From 9c0f59e47a90c54d0153f8ddc0f80d7a36207d0e Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 18 Mar 2024 11:59:02 +0100 Subject: [PATCH 001/313] HID: i2c-hid: remove I2C_HID_READ_PENDING flag to prevent lock-up The flag I2C_HID_READ_PENDING is used to serialize I2C operations. However, this is not necessary, because I2C core already has its own locking for that. More importantly, this flag can cause a lock-up: if the flag is set in i2c_hid_xfer() and an interrupt happens, the interrupt handler (i2c_hid_irq) will check this flag and return immediately without doing anything, then the interrupt handler will be invoked again in an infinite loop. Since interrupt handler is an RT task, it takes over the CPU and the flag-clearing task never gets scheduled, thus we have a lock-up. Delete this unnecessary flag. Reported-and-tested-by: Eva Kurchatova Closes: https://lore.kernel.org/r/CA+eeCSPUDpUg76ZO8dszSbAGn+UHjcyv8F1J-CUPVARAzEtW9w@mail.gmail.com Fixes: 4a200c3b9a40 ("HID: i2c-hid: introduce HID over i2c specification implementation") Cc: Signed-off-by: Nam Cao Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid-core.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 2df1ab3c31cc..1c86c97688e9 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -64,7 +64,6 @@ /* flags */ #define I2C_HID_STARTED 0 #define I2C_HID_RESET_PENDING 1 -#define I2C_HID_READ_PENDING 2 #define I2C_HID_PWR_ON 0x00 #define I2C_HID_PWR_SLEEP 0x01 @@ -190,15 +189,10 @@ static int i2c_hid_xfer(struct i2c_hid *ihid, msgs[n].len = recv_len; msgs[n].buf = recv_buf; n++; - - set_bit(I2C_HID_READ_PENDING, &ihid->flags); } ret = i2c_transfer(client->adapter, msgs, n); - if (recv_len) - clear_bit(I2C_HID_READ_PENDING, &ihid->flags); - if (ret != n) return ret < 0 ? ret : -EIO; @@ -556,9 +550,6 @@ static irqreturn_t i2c_hid_irq(int irq, void *dev_id) { struct i2c_hid *ihid = dev_id; - if (test_bit(I2C_HID_READ_PENDING, &ihid->flags)) - return IRQ_HANDLED; - i2c_hid_get_input(ihid); return IRQ_HANDLED; From 92826905ae340b7f2b25759a06c8c60bfc476b9f Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Wed, 6 Mar 2024 00:44:04 +0000 Subject: [PATCH 002/313] HID: intel-ish-hid: ipc: Fix dev_err usage with uninitialized dev->devc The variable dev->devc in ish_dev_init was utilized by dev_err before it was properly assigned. To rectify this, the assignment of dev->devc has been moved to immediately follow memory allocation. Without this change "(NULL device *)" is printed for device information. Fixes: 8ae2f2b0a284 ("HID: intel-ish-hid: ipc: Fix potential use-after-free in work function") Fixes: ae02e5d40d5f ("HID: intel-ish-hid: ipc layer") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/ipc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c index a49c6affd7c4..dd5fc60874ba 100644 --- a/drivers/hid/intel-ish-hid/ipc/ipc.c +++ b/drivers/hid/intel-ish-hid/ipc/ipc.c @@ -948,6 +948,7 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev) if (!dev) return NULL; + dev->devc = &pdev->dev; ishtp_device_init(dev); init_waitqueue_head(&dev->wait_hw_ready); @@ -983,7 +984,6 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev) } dev->ops = &ish_hw_ops; - dev->devc = &pdev->dev; dev->mtu = IPC_PAYLOAD_SIZE - sizeof(struct ishtp_msg_hdr); return dev; } From 139b4c37e9cb0943e51adbb9c20c45bf60e44422 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 21 Mar 2024 16:19:27 +0100 Subject: [PATCH 003/313] MAINTAINERS: update Benjamin's email address Update my email address to the kernel.org one, as it's getting more convenient this way. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- .mailmap | 2 ++ MAINTAINERS | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index e90797de3256..25019ddf467c 100644 --- a/.mailmap +++ b/.mailmap @@ -96,6 +96,8 @@ Ben Widawsky Ben Widawsky Ben Widawsky Benjamin Poirier +Benjamin Tissoires +Benjamin Tissoires Bjorn Andersson Bjorn Andersson Bjorn Andersson diff --git a/MAINTAINERS b/MAINTAINERS index c50e72258ba9..52bb6f2c3e7b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9546,7 +9546,7 @@ F: kernel/power/ HID CORE LAYER M: Jiri Kosina -M: Benjamin Tissoires +M: Benjamin Tissoires L: linux-input@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git @@ -22685,7 +22685,7 @@ F: drivers/usb/host/ehci* USB HID/HIDBP DRIVERS (USB KEYBOARDS, MICE, REMOTE CONTROLS, ...) M: Jiri Kosina -M: Benjamin Tissoires +M: Benjamin Tissoires L: linux-usb@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git From a2ac2a1b02590a22a236c43c455f421cdede45f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Thu, 14 Mar 2024 15:24:35 +0300 Subject: [PATCH 004/313] arm64: dts: rockchip: set PHY address of MT7531 switch to 0x1f MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MT7531 switch listens on PHY address 0x1f on an MDIO bus. I've got two findings that support this. There's no bootstrapping option to change the PHY address of the switch. The Linux driver hardcodes 0x1f as the PHY address of the switch. So the reg property on the device tree is currently ignored by the Linux driver. Therefore, describe the correct PHY address on Banana Pi BPI-R2 Pro that has this switch. Signed-off-by: Arınç ÜNAL Fixes: c1804463e5c6 ("arm64: dts: rockchip: Add mt7531 dsa node to BPI-R2-Pro board") Link: https://lore.kernel.org/r/20240314-for-rockchip-mt7531-phy-address-v1-1-743b5873358f@arinc9.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index 7b5f3904ef61..03d6d920446a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -525,9 +525,9 @@ &mdio0 { #address-cells = <1>; #size-cells = <0>; - switch@0 { + switch@1f { compatible = "mediatek,mt7531"; - reg = <0>; + reg = <0x1f>; ports { #address-cells = <1>; From 0ac417b8f124427c90ec8c2ef4f632b821d924cc Mon Sep 17 00:00:00 2001 From: Iskander Amara Date: Fri, 8 Mar 2024 09:52:42 +0100 Subject: [PATCH 005/313] arm64: dts: rockchip: enable internal pull-up for Q7_THRM# on RK3399 Puma Q7_THRM# pin is connected to a diode on the module which is used as a level shifter, and the pin have a pull-down enabled by default. We need to configure it to internal pull-up, other- wise whenever the pin is configured as INPUT and we try to control it externally the value will always remain zero. Signed-off-by: Iskander Amara Fixes: 2c66fc34e945 ("arm64: dts: rockchip: add RK3399-Q7 (Puma) SoM") Reviewed-by: Quentin Schulz Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240308085243.69903-1-iskander.amara@theobroma-systems.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index c08e69391c01..06f3e97af7cd 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -426,6 +426,16 @@ &pwm2 { }; &pinctrl { + pinctrl-names = "default"; + pinctrl-0 = <&q7_thermal_pin>; + + gpios { + q7_thermal_pin: q7-thermal-pin { + rockchip,pins = + <0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_up>; + }; + }; + i2c8 { i2c8_xfer_a: i2c8-xfer { rockchip,pins = From f0abb4b2c7acf3c3e4130dc3f54cd90cf2ae62bc Mon Sep 17 00:00:00 2001 From: Iskander Amara Date: Fri, 8 Mar 2024 09:52:43 +0100 Subject: [PATCH 006/313] arm64: dts: rockchip: fix alphabetical ordering RK3399 puma Nodes overridden by their reference should be ordered alphabetically to make it easier to read the DTS. pinctrl node is defined in the wrong location so let's reorder it. Signed-off-by: Iskander Amara Reviewed-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308085243.69903-2-iskander.amara@theobroma-systems.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 06f3e97af7cd..214ea62b24a5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -416,15 +416,6 @@ &io_domains { gpio1830-supply = <&vcc_1v8>; }; -&pmu_io_domains { - status = "okay"; - pmu1830-supply = <&vcc_1v8>; -}; - -&pwm2 { - status = "okay"; -}; - &pinctrl { pinctrl-names = "default"; pinctrl-0 = <&q7_thermal_pin>; @@ -473,6 +464,15 @@ usb3_id: usb3-id { }; }; +&pmu_io_domains { + status = "okay"; + pmu1830-supply = <&vcc_1v8>; +}; + +&pwm2 { + status = "okay"; +}; + &sdhci { /* * Signal integrity isn't great at 200MHz but 100MHz has proven stable From e6b1168f37e3f86d9966276c5a3fff9eb0df3e5f Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 8 Mar 2024 16:46:07 +0100 Subject: [PATCH 007/313] arm64: dts: rockchip: enable internal pull-up on Q7_USB_ID for RK3399 Puma The Q7_USB_ID has a diode used as a level-shifter, and is used as an input pin. The SoC default for this pin is a pull-up, which is correct but the pinconf in the introducing commit missed that, so let's fix this oversight. Fixes: ed2c66a95c0c ("arm64: dts: rockchip: fix rk3399-puma-haikou USB OTG mode") Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308-puma-diode-pu-v2-1-309f83da110a@theobroma-systems.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 214ea62b24a5..a51ebb8f8b80 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -459,7 +459,7 @@ vcc5v0_host_en: vcc5v0-host-en { usb3 { usb3_id: usb3-id { rockchip,pins = - <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>; + <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_up>; }; }; }; From 945a7c8570916650a415757d15d83e0fa856a686 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 8 Mar 2024 16:46:08 +0100 Subject: [PATCH 008/313] arm64: dts: rockchip: enable internal pull-up on PCIE_WAKE# for RK3399 Puma The PCIE_WAKE# has a diode used as a level-shifter, and is used as an input pin. While the SoC default is to enable the pull-up, the core rk3399 pinconf for this pin opted for pull-none. So as to not disturb the behaviour of other boards which may rely on pull-none instead of pull-up, set the needed pull-up only for RK3399 Puma. Fixes: 60fd9f72ce8a ("arm64: dts: rockchip: add Haikou baseboard with RK3399-Q7 SoM") Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308-puma-diode-pu-v2-2-309f83da110a@theobroma-systems.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index a51ebb8f8b80..2484ad2bd86f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -416,6 +416,11 @@ &io_domains { gpio1830-supply = <&vcc_1v8>; }; +&pcie_clkreqn_cpm { + rockchip,pins = + <2 RK_PD2 RK_FUNC_GPIO &pcfg_pull_up>; +}; + &pinctrl { pinctrl-names = "default"; pinctrl-0 = <&q7_thermal_pin>; From d7ed698abc28b2886c9fc71d17ca6b023fcf47f3 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 8 Mar 2024 16:46:09 +0100 Subject: [PATCH 009/313] arm64: dts: rockchip: add regulators for PCIe on RK3399 Puma Haikou The PCIe PHY requires two regulators and are present on the SoM directly, while the PCIe connector also exposes 3V3 and 12V power rails which are available on the baseboard. Considering that 3/4 regulators are always-on on HW level and that the last one depends on a regulator from the PMIC that is specified as always on, this commit should be purely cosmetic and no change in behavior is expected. Let's add all regulators for PCIe on RK3399 Puma Haikou. Reviewed-by: Dragan Simic Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308-puma-diode-pu-v2-3-309f83da110a@theobroma-systems.com Signed-off-by: Heiko Stuebner --- .../boot/dts/rockchip/rk3399-puma-haikou.dts | 2 ++ arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 2c3984a880af..f6f15946579e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -194,6 +194,8 @@ &pcie0 { num-lanes = <4>; pinctrl-names = "default"; pinctrl-0 = <&pcie_clkreqn_cpm>; + vpcie3v3-supply = <&vcc3v3_baseboard>; + vpcie12v-supply = <&dc_12v>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 2484ad2bd86f..ccbe3a7a1d2c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -79,6 +79,26 @@ vcc5v0_sys: vcc5v0-sys { regulator-max-microvolt = <5000000>; }; + vcca_0v9: vcca-0v9-regulator { + compatible = "regulator-fixed"; + regulator-name = "vcca_0v9"; + regulator-always-on; + regulator-boot-on; + regulator-min-microvolt = <900000>; + regulator-max-microvolt = <900000>; + vin-supply = <&vcc_1v8>; + }; + + vcca_1v8: vcca-1v8-regulator { + compatible = "regulator-fixed"; + regulator-name = "vcca_1v8"; + regulator-always-on; + regulator-boot-on; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + vin-supply = <&vcc3v3_sys>; + }; + vdd_log: vdd-log { compatible = "pwm-regulator"; pwms = <&pwm2 0 25000 1>; @@ -416,6 +436,12 @@ &io_domains { gpio1830-supply = <&vcc_1v8>; }; +&pcie0 { + /* PCIe PHY supplies */ + vpcie0v9-supply = <&vcca_0v9>; + vpcie1v8-supply = <&vcca_1v8>; +}; + &pcie_clkreqn_cpm { rockchip,pins = <2 RK_PD2 RK_FUNC_GPIO &pcfg_pull_up>; From 64da060dd4eb625646970d7c96a16de617412ec5 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Sun, 24 Mar 2024 19:28:33 +0800 Subject: [PATCH 010/313] arm64: dts: rockchip: Fix the i2c address of es8316 on Cool Pi CM5 According to the hardware design, the i2c address of audio codec es8316 on Cool Pi CM5 is 0x10. This fix the read/write error like bellow: es8316 7-0011: ASoC: error at soc_component_write_no_lock on es8316.7-0011 for register: [0x0000000c] -6 es8316 7-0011: ASoC: error at soc_component_write_no_lock on es8316.7-0011 for register: [0x00000003] -6 es8316 7-0011: ASoC: error at soc_component_read_no_lock on es8316.7-0011 for register: [0x00000016] -6 es8316 7-0011: ASoC: error at soc_component_read_no_lock on es8316.7-0011 for register: [0x00000016] -6 Fixes: 791c154c3982 ("arm64: dts: rockchip: Add support for rk3588 based board Cool Pi CM5 EVB") Signed-off-by: Andy Yan Link: https://lore.kernel.org/r/20240324112833.2181961-1-andyshrk@163.com [also adapted the node name to audio-codec@10] Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi index cce1c8e83587..94ecb9b4f98f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi @@ -216,9 +216,9 @@ &i2c7 { pinctrl-0 = <&i2c7m0_xfer>; status = "okay"; - es8316: audio-codec@11 { + es8316: audio-codec@10 { compatible = "everest,es8316"; - reg = <0x11>; + reg = <0x10>; assigned-clocks = <&cru I2S0_8CH_MCLKOUT>; assigned-clock-rates = <12288000>; clocks = <&cru I2S0_8CH_MCLKOUT>; From 4053caf60bb349ab9ea9e36ee30c64681b696198 Mon Sep 17 00:00:00 2001 From: William Zhang Date: Wed, 20 Mar 2024 15:26:22 -0700 Subject: [PATCH 011/313] mtd: rawnand: brcmnand: Fix data access violation for STB chip Florian reported the following kernel NULL pointer dereference issue on a BCM7250 board: [ 2.829744] Unable to handle kernel NULL pointer dereference at virtual address 0000000c when read [ 2.838740] [0000000c] *pgd=80000000004003, *pmd=00000000 [ 2.844178] Internal error: Oops: 206 [#1] SMP ARM [ 2.848990] Modules linked in: [ 2.852061] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.8.0-next-20240305-gd95fcdf4961d #66 [ 2.860436] Hardware name: Broadcom STB (Flattened Device Tree) [ 2.866371] PC is at brcmnand_read_by_pio+0x180/0x278 [ 2.871449] LR is at __wait_for_common+0x9c/0x1b0 [ 2.876178] pc : [] lr : [] psr: 60000053 [ 2.882460] sp : f0811a80 ip : 00000012 fp : 00000000 [ 2.887699] r10: 00000000 r9 : 00000000 r8 : c3790000 [ 2.892936] r7 : 00000000 r6 : 00000000 r5 : c35db440 r4 : ffe00000 [ 2.899479] r3 : f15cb814 r2 : 00000000 r1 : 00000000 r0 : 00000000 The issue only happens when dma mode is disabled or not supported on STB chip. The pio mode transfer calls brcmnand_read_data_bus function which dereferences ctrl->soc->read_data_bus. But the soc member in STB chip is NULL hence triggers the access violation. The function needs to check the soc pointer first. Fixes: 546e42599120 ("mtd: rawnand: brcmnand: Add BCMBCA read data bus interface") Reported-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: William Zhang Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240320222623.35604-1-william.zhang@broadcom.com --- drivers/mtd/nand/raw/brcmnand/brcmnand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c index a8d12c71f987..1b2ec0fec60c 100644 --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c @@ -857,7 +857,7 @@ static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl, struct brcmnand_soc *soc = ctrl->soc; int i; - if (soc->read_data_bus) { + if (soc && soc->read_data_bus) { soc->read_data_bus(soc, flash_cache, buffer, fc_words); } else { for (i = 0; i < fc_words; i++) From afc89870ea677bd5a44516eb981f7a259b74280c Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 28 Mar 2024 12:21:51 +0530 Subject: [PATCH 012/313] dmaengine: Revert "dmaengine: pl330: issue_pending waits until WFP state" This reverts commit 22a9d9585812 ("dmaengine: pl330: issue_pending waits until WFP state") as it seems to cause regression in pl330 driver. Note the issue now exists in mainline so a fix to be done. Cc: stable@vger.kernel.org Reported-by: karthikeyan Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 5f6d7f1e095f..ad8e3da1b2cd 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1053,9 +1053,6 @@ static bool _trigger(struct pl330_thread *thrd) thrd->req_running = idx; - if (desc->rqtype == DMA_MEM_TO_DEV || desc->rqtype == DMA_DEV_TO_MEM) - UNTIL(thrd, PL330_STATE_WFP); - return true; } From 43c633ef93a5d293c96ebcedb40130df13128428 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Mar 2024 14:21:07 +0100 Subject: [PATCH 013/313] dmaengine: owl: fix register access functions When building with 'make W=1', clang notices that the computed register values are never actually written back but instead the wrong variable is set: drivers/dma/owl-dma.c:244:6: error: variable 'regval' set but not used [-Werror,-Wunused-but-set-variable] 244 | u32 regval; | ^ drivers/dma/owl-dma.c:268:6: error: variable 'regval' set but not used [-Werror,-Wunused-but-set-variable] 268 | u32 regval; | ^ Change these to what was most likely intended. Fixes: 47e20577c24d ("dmaengine: Add Actions Semi Owl family S900 DMA driver") Signed-off-by: Arnd Bergmann Reviewed-by: Peter Korsgaard Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240322132116.906475-1-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/owl-dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c index 4e76c4ec2d39..e001f4f7aa64 100644 --- a/drivers/dma/owl-dma.c +++ b/drivers/dma/owl-dma.c @@ -250,7 +250,7 @@ static void pchan_update(struct owl_dma_pchan *pchan, u32 reg, else regval &= ~val; - writel(val, pchan->base + reg); + writel(regval, pchan->base + reg); } static void pchan_writel(struct owl_dma_pchan *pchan, u32 reg, u32 data) @@ -274,7 +274,7 @@ static void dma_update(struct owl_dma *od, u32 reg, u32 val, bool state) else regval &= ~val; - writel(val, od->base + reg); + writel(regval, od->base + reg); } static void dma_writel(struct owl_dma *od, u32 reg, u32 data) From 30f0ced9971b2d8c8c24ae75786f9079489a012d Mon Sep 17 00:00:00 2001 From: Akhil R Date: Fri, 15 Mar 2024 18:14:11 +0530 Subject: [PATCH 014/313] dmaengine: tegra186: Fix residual calculation The existing residual calculation returns an incorrect value when bytes_xfer == bytes_req. This scenario occurs particularly with drivers like UART where DMA is scheduled for maximum number of bytes and is terminated when the bytes inflow stops. At higher baud rates, it could request the tx_status while there is no bytes left to transfer. This will lead to incorrect residual being set. Hence return residual as '0' when bytes transferred equals to the bytes requested. Fixes: ee17028009d4 ("dmaengine: tegra: Add tegra gpcdma driver") Signed-off-by: Akhil R Reviewed-by: Jon Hunter Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20240315124411.17582-1-akhilrajeev@nvidia.com Signed-off-by: Vinod Koul --- drivers/dma/tegra186-gpc-dma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c index 88547a23825b..3642508e88bb 100644 --- a/drivers/dma/tegra186-gpc-dma.c +++ b/drivers/dma/tegra186-gpc-dma.c @@ -746,6 +746,9 @@ static int tegra_dma_get_residual(struct tegra_dma_channel *tdc) bytes_xfer = dma_desc->bytes_xfer + sg_req[dma_desc->sg_idx].len - (wcount * 4); + if (dma_desc->bytes_req == bytes_xfer) + return 0; + residual = dma_desc->bytes_req - (bytes_xfer % dma_desc->bytes_req); return residual; From 9140ce47872bfd89fca888c2f992faa51d20c2bc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 Mar 2024 14:04:21 +0200 Subject: [PATCH 015/313] idma64: Don't try to serve interrupts when device is powered off When iDMA 64-bit device is powered off, the IRQ status register is all 1:s. This is never happen in real case and signalling that the device is simply powered off. Don't try to serve interrupts that are not ours. Fixes: 667dfed98615 ("dmaengine: add a driver for Intel integrated DMA 64-bit") Reported-by: Heiner Kallweit Closes: https://lore.kernel.org/r/700bbb84-90e1-4505-8ff0-3f17ea8bc631@gmail.com Tested-by: Heiner Kallweit Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240321120453.1360138-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idma64.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c index 78a938969d7d..1398814d8fbb 100644 --- a/drivers/dma/idma64.c +++ b/drivers/dma/idma64.c @@ -171,6 +171,10 @@ static irqreturn_t idma64_irq(int irq, void *dev) u32 status_err; unsigned short i; + /* Since IRQ may be shared, check if DMA controller is powered on */ + if (status == GENMASK(31, 0)) + return IRQ_NONE; + dev_vdbg(idma64->dma.dev, "%s: status=%#x\n", __func__, status); /* Check if we have any interrupt from the DMA controller */ From 63dc588e7af1392576071a1841298198c9cddee3 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Wed, 27 Mar 2024 12:01:43 +0530 Subject: [PATCH 016/313] soundwire: amd: fix for wake interrupt handling for clockstop mode When SoundWire Wake interrupt is enabled along with SoundWire Wake enable register, SoundWire wake interrupt will be reported when SoundWire manager is in D3 state and ACP is in D3 state. When SoundWire Wake interrupt is reported, it will invoke runtime resume of the SoundWire manager device. In case of system level suspend, for ClockStop Mode SoundWire Wake interrupt should be disabled. It should be enabled only for runtime suspend scenario. Change wake interrupt enable/disable sequence for ClockStop Mode in system level suspend and runtime suspend sceanrio. Fixes: 9cf1efc5ed2d ("soundwire: amd: add pm_prepare callback and pm ops support") Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20240327063143.2266464-2-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 15 +++++++++++++++ drivers/soundwire/amd_manager.h | 3 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 7cd24bd8e224..6bcf8e75273c 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -130,6 +130,19 @@ static void amd_sdw_set_frameshape(struct amd_sdw_manager *amd_manager) writel(frame_size, amd_manager->mmio + ACP_SW_FRAMESIZE); } +static void amd_sdw_wake_enable(struct amd_sdw_manager *amd_manager, bool enable) +{ + u32 wake_ctrl; + + wake_ctrl = readl(amd_manager->mmio + ACP_SW_STATE_CHANGE_STATUS_MASK_8TO11); + if (enable) + wake_ctrl |= AMD_SDW_WAKE_INTR_MASK; + else + wake_ctrl &= ~AMD_SDW_WAKE_INTR_MASK; + + writel(wake_ctrl, amd_manager->mmio + ACP_SW_STATE_CHANGE_STATUS_MASK_8TO11); +} + static void amd_sdw_ctl_word_prep(u32 *lower_word, u32 *upper_word, struct sdw_msg *msg, int cmd_offset) { @@ -1095,6 +1108,7 @@ static int __maybe_unused amd_suspend(struct device *dev) } if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) { + amd_sdw_wake_enable(amd_manager, false); return amd_sdw_clock_stop(amd_manager); } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { /* @@ -1121,6 +1135,7 @@ static int __maybe_unused amd_suspend_runtime(struct device *dev) return 0; } if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) { + amd_sdw_wake_enable(amd_manager, true); return amd_sdw_clock_stop(amd_manager); } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { ret = amd_sdw_clock_stop(amd_manager); diff --git a/drivers/soundwire/amd_manager.h b/drivers/soundwire/amd_manager.h index 418b679e0b1a..707065468e05 100644 --- a/drivers/soundwire/amd_manager.h +++ b/drivers/soundwire/amd_manager.h @@ -152,7 +152,7 @@ #define AMD_SDW0_EXT_INTR_MASK 0x200000 #define AMD_SDW1_EXT_INTR_MASK 4 #define AMD_SDW_IRQ_MASK_0TO7 0x77777777 -#define AMD_SDW_IRQ_MASK_8TO11 0x000d7777 +#define AMD_SDW_IRQ_MASK_8TO11 0x000c7777 #define AMD_SDW_IRQ_ERROR_MASK 0xff #define AMD_SDW_MAX_FREQ_NUM 1 #define AMD_SDW0_MAX_TX_PORTS 3 @@ -190,6 +190,7 @@ #define AMD_SDW_CLK_RESUME_REQ 2 #define AMD_SDW_CLK_RESUME_DONE 3 #define AMD_SDW_WAKE_STAT_MASK BIT(16) +#define AMD_SDW_WAKE_INTR_MASK BIT(16) static u32 amd_sdw_freq_tbl[AMD_SDW_MAX_FREQ_NUM] = { AMD_SDW_DEFAULT_CLK_FREQ, From 7d49f53af4b988b188d3932deac2c9c80fd7d9ce Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 8 Mar 2024 09:36:31 +0000 Subject: [PATCH 017/313] rust: don't select CONSTRUCTORS This was originally part of commit 4b9a68f2e59a0 ("rust: add support for static synchronisation primitives") from the old Rust branch, which used module constructors to initialize globals containing various synchronisation primitives with pin-init. That commit has never been upstreamed, but the `select CONSTRUCTORS` statement ended up being included in the patch that initially added Rust support to the Linux Kernel. We are not using module constructors, so let's remove the select. Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Cc: stable@vger.kernel.org Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Link: https://lore.kernel.org/r/20240308-constructors-v1-1-4c811342391c@google.com Signed-off-by: Miguel Ojeda --- init/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index aa02aec6aa7d..b9a336a3d7d8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1903,7 +1903,6 @@ config RUST depends on !GCC_PLUGINS depends on !RANDSTRUCT depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE - select CONSTRUCTORS help Enables Rust support in the kernel. From 01848eee20c6396e5a96cfbc9061dc37481e06fd Mon Sep 17 00:00:00 2001 From: Bo-Wei Chen Date: Sun, 24 Mar 2024 09:09:15 +0800 Subject: [PATCH 018/313] docs: rust: fix improper rendering in Arch Support page Fix improper rendering of table cell (empty bullet list) by rendering as a dash using the backslash escaping mechanism [1]. Link: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#escaping-mechanism [1] Reported-by: Miguel Ojeda Closes: https://github.com/Rust-for-Linux/linux/issues/1069 Signed-off-by: Bo-Wei Chen Reviewed-by: Benno Lossin Fixes: 90868ff9cade ("LoongArch: Enable initial Rust support") Link: https://lore.kernel.org/r/20240324010915.3089934-1-tim.chenbw@gmail.com [ Reworded slightly title and message; use "Link:" tag. ] Signed-off-by: Miguel Ojeda --- Documentation/rust/arch-support.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst index 5c4fa9f5d1cd..c9137710633a 100644 --- a/Documentation/rust/arch-support.rst +++ b/Documentation/rust/arch-support.rst @@ -16,7 +16,7 @@ support corresponds to ``S`` values in the ``MAINTAINERS`` file. Architecture Level of support Constraints ============= ================ ============================================== ``arm64`` Maintained Little Endian only. -``loongarch`` Maintained - +``loongarch`` Maintained \- ``um`` Maintained ``x86_64`` only. ``x86`` Maintained ``x86_64`` only. ============= ================ ============================================== From 43853e843aa6c3d47ff2b0cce898318839483d05 Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Mon, 1 Apr 2024 00:20:56 +0200 Subject: [PATCH 019/313] arm64: dts: rockchip: Remove unsupported node from the Pinebook Pro dts Remove a redundant node from the Pine64 Pinebook Pro dts, which is intended to provide a value for the delay in PCI Express enumeration, but that isn't supported without additional out-of-tree kernel patches. There were already efforts to upstream those kernel patches, because they reportedly make some PCI Express cards (such as LSI SAS HBAs) usable in Pine64 RockPro64 (which is also based on the RK3399); otherwise, those PCI Express cards fail to enumerate. However, providing the required background and explanations proved to be a tough nut to crack, which is the reason why those patches remain outside of the kernel mainline for now. If those out-of-tree patches eventually become upstreamed, the resulting device-tree changes will almost surely belong to the RK3399 SoC dtsi. Also, the above-mentioned unusable-without-out-of-tree-patches PCI Express devices are in all fairness not usable in a Pinebook Pro without some extensive hardware modifications, which is another reason to delete this redundant node. When it comes to the Pinebook Pro, only M.2 NVMe SSDs can be installed out of the box (using an additional passive adapter PCB sold separately by Pine64), which reportedly works fine with no additional patches. Fixes: 5a65505a6988 ("arm64: dts: rockchip: Add initial support for Pinebook Pro") Signed-off-by: Dragan Simic Link: https://lore.kernel.org/r/0f82c3f97cb798d012270d13b34d8d15305ef293.1711923520.git.dsimic@manjaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 054c6a4d1a45..294eb2de263d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -779,7 +779,6 @@ &pcie_phy { }; &pcie0 { - bus-scan-delay-ms = <1000>; ep-gpios = <&gpio2 RK_PD4 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; From 4ddc13461740308d3133c2defda97d9e3a30ede8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 1 Apr 2024 16:09:39 +0200 Subject: [PATCH 020/313] arm64: dts: rockchip: drop panel port unit address in GRU Scarlet Panel port does not have "reg", thus it should not have unit address, as reported by dtc W=1 warning: rk3399-gru-scarlet.dtsi:666.32-668.7: Warning (unit_address_vs_reg): /dsi@ff960000/panel@0/ports/port@1/endpoint@1: node has a unit name, but no reg or ranges property Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240401140939.97808-1-krzk@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index 5846a11f0e84..6d1e3ca86392 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -663,7 +663,7 @@ mipi_in_panel: endpoint { port@1 { reg = <1>; - mipi1_in_panel: endpoint@1 { + mipi1_in_panel: endpoint { remote-endpoint = <&mipi1_out_panel>; }; }; From db70d9f9dcf8d5cda86303eeb381b1213a2ab191 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 27 Mar 2024 22:02:58 -0700 Subject: [PATCH 021/313] ARC: Fix -Wmissing-prototypes warnings | ../arch/arc/kernel/kprobes.c:193:15: warning: no previous prototype for 'arc_kprobe_handler' [-Wmissing-prototypes] | 193 | int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs) | |../arch/arc/kernel/ptrace.c:342:16: warning: no previous prototype for 'syscall_trace_enter' [-Wmissing-prototypes] | 342 | asmlinkage int syscall_trace_enter(struct pt_regs *regs) Link: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20240325/testrun/23149630/suite/build/test/gcc-9-defconfig/log Reported-by: Linux Kernel Functional Testing Signed-off-by: Vineet Gupta --- arch/arc/include/asm/ptrace.h | 2 +- arch/arc/kernel/kprobes.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 00b9318e551e..cf79df0b2570 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -169,7 +169,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, return *(unsigned long *)((unsigned long)regs + offset); } -extern int syscall_trace_entry(struct pt_regs *); +extern int syscall_trace_enter(struct pt_regs *); extern void syscall_trace_exit(struct pt_regs *); #endif /* !__ASSEMBLY__ */ diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c index e71d64119d71..f8e2960832d9 100644 --- a/arch/arc/kernel/kprobes.c +++ b/arch/arc/kernel/kprobes.c @@ -190,7 +190,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs) } } -int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs) +static int +__kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs) { struct kprobe *p; struct kprobe_ctlblk *kcb; @@ -241,8 +242,8 @@ int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs) return 0; } -static int __kprobes arc_post_kprobe_handler(unsigned long addr, - struct pt_regs *regs) +static int +__kprobes arc_post_kprobe_handler(unsigned long addr, struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); From d5272aaa8257920c7b398f953ada65e25c248f9a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 27 Mar 2024 22:19:25 -0700 Subject: [PATCH 022/313] ARC: mm: fix new code about cache aliasing Manual/partial revert of 8690bbcf3b70 ("Introduce cpu_dcache_is_aliasing() across all architectures") Current generation of ARCv2/ARCv3 based HSxx cores are only PIPT (to software at least). Legacy ARC700 cpus could be VIPT aliasing (based on cache geometry and PAGE_SIZE) [1] however recently that support was ripped out so VIPT aliasing cache is not relevant to ARC anymore. [1] http://lists.infradead.org/pipermail/linux-snps-arc/2023-February/006899.html Acked-by: Mathieu Desnoyers Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 1 - arch/arc/include/asm/cachetype.h | 9 --------- 2 files changed, 10 deletions(-) delete mode 100644 arch/arc/include/asm/cachetype.h diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 99d2845f3feb..4092bec198be 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -6,7 +6,6 @@ config ARC def_bool y select ARC_TIMERS - select ARCH_HAS_CPU_CACHE_ALIASING select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DMA_PREP_COHERENT diff --git a/arch/arc/include/asm/cachetype.h b/arch/arc/include/asm/cachetype.h deleted file mode 100644 index 05fc7ed59712..000000000000 --- a/arch/arc/include/asm/cachetype.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARC_CACHETYPE_H -#define __ASM_ARC_CACHETYPE_H - -#include - -#define cpu_dcache_is_aliasing() true - -#endif From ebfc2fd8873b4feb86f01835ad97282aede1e956 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 29 Mar 2024 17:14:32 -0500 Subject: [PATCH 023/313] ARC: Fix typos Fix typos, most reported by "codespell arch/arc". Only touches comments, no code changes. Signed-off-by: Bjorn Helgaas Signed-off-by: Vineet Gupta --- arch/arc/boot/Makefile | 4 ++-- arch/arc/boot/dts/axc003.dtsi | 4 ++-- arch/arc/boot/dts/vdk_axs10x_mb.dtsi | 2 +- arch/arc/include/asm/dsp.h | 2 +- arch/arc/include/asm/entry-compact.h | 10 +++++----- arch/arc/include/asm/entry.h | 4 ++-- arch/arc/include/asm/irq.h | 2 +- arch/arc/include/asm/irqflags-compact.h | 2 +- arch/arc/include/asm/mmu_context.h | 2 +- arch/arc/include/asm/pgtable-bits-arcv2.h | 2 +- arch/arc/include/asm/shmparam.h | 2 +- arch/arc/include/asm/smp.h | 4 ++-- arch/arc/include/asm/thread_info.h | 2 +- arch/arc/include/uapi/asm/swab.h | 2 +- arch/arc/kernel/entry-arcv2.S | 8 ++++---- arch/arc/kernel/entry.S | 4 ++-- arch/arc/kernel/head.S | 2 +- arch/arc/kernel/intc-arcv2.c | 2 +- arch/arc/kernel/perf_event.c | 2 +- arch/arc/kernel/setup.c | 2 +- arch/arc/kernel/signal.c | 7 ++++--- arch/arc/kernel/traps.c | 2 +- arch/arc/kernel/vmlinux.lds.S | 4 ++-- arch/arc/mm/tlb.c | 4 ++-- arch/arc/mm/tlbex.S | 8 ++++---- 25 files changed, 45 insertions(+), 44 deletions(-) diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index 5648748c285f..5a8550124b73 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -# uImage build relies on mkimage being availble on your host for ARC target +# uImage build relies on mkimage being available on your host for ARC target # You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage -# and make sure it's reacable from your PATH +# and make sure it's reachable from your PATH OBJCOPYFLAGS= -O binary -R .note -R .note.gnu.build-id -R .comment -S diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 3434c8131ecd..c0a812674ce9 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -119,9 +119,9 @@ mmc@15000 { /* * The DW APB ICTL intc on MB is connected to CPU intc via a * DT "invisible" DW APB GPIO block, configured to simply pass thru - * interrupts - setup accordinly in platform init (plat-axs10x/ax10x.c) + * interrupts - setup accordingly in platform init (plat-axs10x/ax10x.c) * - * So here we mimic a direct connection betwen them, ignoring the + * So here we mimic a direct connection between them, ignoring the * ABPG GPIO. Thus set "interrupts = <24>" (DW APB GPIO to core) * instead of "interrupts = <12>" (DW APB ICTL to DW APB GPIO) * diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi index 90a412026e64..0e0e2d337bf8 100644 --- a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi +++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi @@ -113,7 +113,7 @@ mmc@15000 { /* * Embedded Vision subsystem UIO mappings; only relevant for EV VDK * - * This node is intentionally put outside of MB above becase + * This node is intentionally put outside of MB above because * it maps areas outside of MB's 0xez-0xfz. */ uio_ev: uio@d0000000 { diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h index 202c78e56704..f496dbc4640b 100644 --- a/arch/arc/include/asm/dsp.h +++ b/arch/arc/include/asm/dsp.h @@ -12,7 +12,7 @@ /* * DSP-related saved registers - need to be saved only when you are * scheduled out. - * structure fields name must correspond to aux register defenitions for + * structure fields name must correspond to aux register definitions for * automatic offset calculation in DSP_AUX_SAVE_RESTORE macros */ struct dsp_callee_regs { diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 92c3e9f13252..00946fe04c9b 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -7,7 +7,7 @@ * Stack switching code can no longer reliably rely on the fact that * if we are NOT in user mode, stack is switched to kernel mode. * e.g. L2 IRQ interrupted a L1 ISR which had not yet completed - * it's prologue including stack switching from user mode + * its prologue including stack switching from user mode * * Vineetg: Aug 28th 2008: Bug #94984 * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap @@ -143,7 +143,7 @@ * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode) * 3. But before it could switch SP from USER to KERNEL stack * a L2 IRQ "Interrupts" L1 - * Thay way although L2 IRQ happened in Kernel mode, stack is still + * That way although L2 IRQ happened in Kernel mode, stack is still * not switched. * To handle this, we may need to switch stack even if in kernel mode * provided SP has values in range of USER mode stack ( < 0x7000_0000 ) @@ -173,7 +173,7 @@ GET_CURR_TASK_ON_CPU r9 - /* With current tsk in r9, get it's kernel mode stack base */ + /* With current tsk in r9, get its kernel mode stack base */ GET_TSK_STACK_BASE r9, r9 /* save U mode SP @ pt_regs->sp */ @@ -282,7 +282,7 @@ * NOTE: * * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg - * for memory load operations. If used in that way interrupts are deffered + * for memory load operations. If used in that way interrupts are deferred * by hardware and that is not good. *-------------------------------------------------------------*/ .macro EXCEPTION_EPILOGUE @@ -350,7 +350,7 @@ * NOTE: * * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg - * for memory load operations. If used in that way interrupts are deffered + * for memory load operations. If used in that way interrupts are deferred * by hardware and that is not good. *-------------------------------------------------------------*/ .macro INTERRUPT_EPILOGUE LVL diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index cf1ba376e992..38c35722cebf 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -7,7 +7,7 @@ #ifndef __ASM_ARC_ENTRY_H #define __ASM_ARC_ENTRY_H -#include /* For NR_syscalls defination */ +#include /* For NR_syscalls definition */ #include #include #include /* For VMALLOC_START */ @@ -56,7 +56,7 @@ .endm /*------------------------------------------------------------- - * given a tsk struct, get to the base of it's kernel mode stack + * given a tsk struct, get to the base of its kernel mode stack * tsk->thread_info is really a PAGE, whose bottom hoists stack * which grows upwards towards thread_info *------------------------------------------------------------*/ diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h index c574712ad865..9cd79263acba 100644 --- a/arch/arc/include/asm/irq.h +++ b/arch/arc/include/asm/irq.h @@ -10,7 +10,7 @@ * ARCv2 can support 240 interrupts in the core interrupts controllers and * 128 interrupts in IDU. Thus 512 virtual IRQs must be enough for most * configurations of boards. - * This doesnt affect ARCompact, but we change it to same value + * This doesn't affect ARCompact, but we change it to same value */ #define NR_IRQS 512 diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index 0d63e568d64c..936a2f21f315 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -46,7 +46,7 @@ * IRQ Control Macros * * All of them have "memory" clobber (compiler barrier) which is needed to - * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available) + * ensure that LD/ST requiring irq safety (R-M-W when LLSC is not available) * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register) * * Noted at the time of Abilis Timer List corruption diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h index dda471f5f05b..9963bb1a5733 100644 --- a/arch/arc/include/asm/mmu_context.h +++ b/arch/arc/include/asm/mmu_context.h @@ -165,7 +165,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * for retiring-mm. However destroy_context( ) still needs to do that because * between mm_release( ) = >deactive_mm( ) and * mmput => .. => __mmdrop( ) => destroy_context( ) - * there is a good chance that task gets sched-out/in, making it's ASID valid + * there is a good chance that task gets sched-out/in, making its ASID valid * again (this teased me for a whole day). */ diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h index f3eea3f30b2e..8ebec1b21d24 100644 --- a/arch/arc/include/asm/pgtable-bits-arcv2.h +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h @@ -66,7 +66,7 @@ * Other rules which cause the divergence from 1:1 mapping * * 1. Although ARC700 can do exclusive execute/write protection (meaning R - * can be tracked independet of X/W unlike some other CPUs), still to + * can be tracked independently of X/W unlike some other CPUs), still to * keep things consistent with other archs: * -Write implies Read: W => R * -Execute implies Read: X => R diff --git a/arch/arc/include/asm/shmparam.h b/arch/arc/include/asm/shmparam.h index 8b0251464ffd..719112af0f41 100644 --- a/arch/arc/include/asm/shmparam.h +++ b/arch/arc/include/asm/shmparam.h @@ -6,7 +6,7 @@ #ifndef __ARC_ASM_SHMPARAM_H #define __ARC_ASM_SHMPARAM_H -/* Handle upto 2 cache bins */ +/* Handle up to 2 cache bins */ #define SHMLBA (2 * PAGE_SIZE) /* Enforce SHMLBA in shmat */ diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index e0913f52c2cd..990f834909f0 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -77,7 +77,7 @@ static inline const char *arc_platform_smp_cpuinfo(void) /* * ARC700 doesn't support atomic Read-Modify-Write ops. - * Originally Interrupts had to be disabled around code to gaurantee atomicity. + * Originally Interrupts had to be disabled around code to guarantee atomicity. * The LLOCK/SCOND insns allow writing interrupt-hassle-free based atomic ops * based on retry-if-irq-in-atomic (with hardware assist). * However despite these, we provide the IRQ disabling variant @@ -86,7 +86,7 @@ static inline const char *arc_platform_smp_cpuinfo(void) * support needed. * * (2) In a SMP setup, the LLOCK/SCOND atomicity across CPUs needs to be - * gaurantted by the platform (not something which core handles). + * guaranteed by the platform (not something which core handles). * Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ * disabling for atomicity. * diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 4c530cf131f3..12daaf3a61ea 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -38,7 +38,7 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long ksp; /* kernel mode stack top in __switch_to */ - int preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_count; /* 0 => preemptible, <0 => BUG */ int cpu; /* current CPU */ unsigned long thr_ptr; /* TLS ptr */ struct task_struct *task; /* main task structure */ diff --git a/arch/arc/include/uapi/asm/swab.h b/arch/arc/include/uapi/asm/swab.h index 02109cd48ee1..8d1f1ef44ba7 100644 --- a/arch/arc/include/uapi/asm/swab.h +++ b/arch/arc/include/uapi/asm/swab.h @@ -62,7 +62,7 @@ * 8051fdc4: st r2,[r1,20] ; Mem op : save result back to mem * * Joern suggested a better "C" algorithm which is great since - * (1) It is portable to any architecure + * (1) It is portable to any architecture * (2) At the same time it takes advantage of ARC ISA (rotate intrns) */ diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index 2e49c81c8086..e238b5fd3c8c 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -5,7 +5,7 @@ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) */ -#include /* ARC_{EXTRY,EXIT} */ +#include /* ARC_{ENTRY,EXIT} */ #include /* SAVE_ALL_{INT1,INT2,TRAP...} */ #include #include @@ -31,7 +31,7 @@ VECTOR res_service ; Reset Vector VECTOR mem_service ; Mem exception VECTOR instr_service ; Instrn Error VECTOR EV_MachineCheck ; Fatal Machine check -VECTOR EV_TLBMissI ; Intruction TLB miss +VECTOR EV_TLBMissI ; Instruction TLB miss VECTOR EV_TLBMissD ; Data TLB miss VECTOR EV_TLBProtV ; Protection Violation VECTOR EV_PrivilegeV ; Privilege Violation @@ -76,11 +76,11 @@ ENTRY(handle_interrupt) # query in hard ISR path would return false (since .IE is set) which would # trips genirq interrupt handling asserts. # - # So do a "soft" disable of interrutps here. + # So do a "soft" disable of interrupts here. # # Note this disable is only for consistent book-keeping as further interrupts # will be disabled anyways even w/o this. Hardware tracks active interrupts - # seperately in AUX_IRQ_ACT.active and will not take new interrupts + # separately in AUX_IRQ_ACT.active and will not take new interrupts # unless this one returns (or higher prio becomes pending in 2-prio scheme) IRQ_DISABLE diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 089f6680518f..3c7e74aba679 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -95,7 +95,7 @@ ENTRY(EV_MachineCheck) lr r0, [efa] mov r1, sp - ; MC excpetions disable MMU + ; MC exceptions disable MMU ARC_MMU_REENABLE r3 lsr r3, r10, 8 @@ -209,7 +209,7 @@ trap_with_param: ; --------------------------------------------- ; syscall TRAP -; ABI: (r0-r7) upto 8 args, (r8) syscall number +; ABI: (r0-r7) up to 8 args, (r8) syscall number ; --------------------------------------------- ENTRY(EV_Trap) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 9152782444b5..8d541f53fae3 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -165,7 +165,7 @@ ENTRY(first_lines_of_secondary) ; setup stack (fp, sp) mov fp, 0 - ; set it's stack base to tsk->thread_info bottom + ; set its stack base to tsk->thread_info bottom GET_TSK_STACK_BASE r0, sp j start_kernel_secondary diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index 678898757e47..f324f0e3341a 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -56,7 +56,7 @@ void arc_init_IRQ(void) WRITE_AUX(AUX_IRQ_CTRL, ictrl); /* - * ARCv2 core intc provides multiple interrupt priorities (upto 16). + * ARCv2 core intc provides multiple interrupt priorities (up to 16). * Typical builds though have only two levels (0-high, 1-low) * Linux by default uses lower prio 1 for most irqs, reserving 0 for * NMI style interrupts in future (say perf) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index adff957962da..6e5a651cd75c 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -38,7 +38,7 @@ * (based on a specific RTL build) * Below is the static map between perf generic/arc specific event_id and * h/w condition names. - * At the time of probe, we loop thru each index and find it's name to + * At the time of probe, we loop thru each index and find its name to * complete the mapping of perf event_id to h/w index as latter is needed * to program the counter really */ diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index d08a5092c2b4..7b6a9beba9db 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -390,7 +390,7 @@ static void arc_chk_core_config(struct cpuinfo_arc *info) #ifdef CONFIG_ARC_HAS_DCCM /* * DCCM can be arbit placed in hardware. - * Make sure it's placement/sz matches what Linux is built with + * Make sure its placement/sz matches what Linux is built with */ if ((unsigned int)__arc_dccm_base != info->dccm.base) panic("Linux built with incorrect DCCM Base address\n"); diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 8f6f4a542964..fefa705a8638 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -8,15 +8,16 @@ * * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK) * -do_signal() supports TIF_RESTORE_SIGMASK - * -do_signal() no loner needs oldset, required by OLD sys_sigsuspend - * -sys_rt_sigsuspend() now comes from generic code, so discard arch implemen + * -do_signal() no longer needs oldset, required by OLD sys_sigsuspend + * -sys_rt_sigsuspend() now comes from generic code, so discard arch + * implementation * -sys_sigsuspend() no longer needs to fudge ptregs, hence that arg removed * -sys_sigsuspend() no longer loops for do_signal(), sets TIF_xxx and leaves * the job to do_signal() * * vineetg: July 2009 * -Modified Code to support the uClibc provided userland sigreturn stub - * to avoid kernel synthesing it on user stack at runtime, costing TLB + * to avoid kernel synthesizing it on user stack at runtime, costing TLB * probes and Cache line flushes. * * vineetg: July 2009 diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index 9b9570b79362..a19751e824fb 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -89,7 +89,7 @@ int do_misaligned_access(unsigned long address, struct pt_regs *regs, /* * Entry point for miscll errors such as Nested Exceptions - * -Duplicate TLB entry is handled seperately though + * -Duplicate TLB entry is handled separately though */ void do_machine_check_fault(unsigned long address, struct pt_regs *regs) { diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 549c3f407918..61a1b2b96e1d 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -41,8 +41,8 @@ SECTIONS #endif /* - * The reason for having a seperate subsection .init.ramfs is to - * prevent objump from including it in kernel dumps + * The reason for having a separate subsection .init.ramfs is to + * prevent objdump from including it in kernel dumps * * Reason for having .init.ramfs above .init is to make sure that the * binary blob is tucked away to one side, reducing the displacement diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index ad702b49aeb3..cae4a7aae0ed 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -212,7 +212,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long flags; /* If range @start to @end is more than 32 TLB entries deep, - * its better to move to a new ASID rather than searching for + * it's better to move to a new ASID rather than searching for * individual entries and then shooting them down * * The calc above is rough, doesn't account for unaligned parts, @@ -408,7 +408,7 @@ static void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *p * -More importantly it makes this handler inconsistent with fast-path * TLB Refill handler which always deals with "current" * - * Lets see the use cases when current->mm != vma->mm and we land here + * Let's see the use cases when current->mm != vma->mm and we land here * 1. execve->copy_strings()->__get_user_pages->handle_mm_fault * Here VM wants to pre-install a TLB entry for user stack while * current->mm still points to pre-execve mm (hence the condition). diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index e054780a8fe0..dc65e87a531f 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -5,19 +5,19 @@ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * * Vineetg: April 2011 : - * -MMU v1: moved out legacy code into a seperate file + * -MMU v1: moved out legacy code into a separate file * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, * helps avoid a shift when preparing PD0 from PTE * * Vineetg: July 2009 - * -For MMU V2, we need not do heuristics at the time of commiting a D-TLB - * entry, so that it doesn't knock out it's I-TLB entry + * -For MMU V2, we need not do heuristics at the time of committing a D-TLB + * entry, so that it doesn't knock out its I-TLB entry * -Some more fine tuning: * bmsk instead of add, asl.cc instead of branch, delay slot utilise etc * * Vineetg: July 2009 * -Practically rewrote the I/D TLB Miss handlers - * Now 40 and 135 instructions a peice as compared to 131 and 449 resp. + * Now 40 and 135 instructions apiece as compared to 131 and 449 resp. * Hence Leaner by 1.5 K * Used Conditional arithmetic to replace excessive branching * Also used short instructions wherever possible From f8def10f73a516b771051a2f70f2f0446902cb4f Mon Sep 17 00:00:00 2001 From: Mantas Pucka Date: Thu, 21 Mar 2024 14:30:01 +0000 Subject: [PATCH 024/313] mmc: sdhci-msm: pervent access to suspended controller Generic sdhci code registers LED device and uses host->runtime_suspended flag to protect access to it. The sdhci-msm driver doesn't set this flag, which causes a crash when LED is accessed while controller is runtime suspended. Fix this by setting the flag correctly. Cc: stable@vger.kernel.org Fixes: 67e6db113c90 ("mmc: sdhci-msm: Add pm_runtime and system PM support") Signed-off-by: Mantas Pucka Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20240321-sdhci-mmc-suspend-v1-1-fbc555a64400@8devices.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-msm.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 668e0aceeeba..e113b99a3eab 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -2694,6 +2694,11 @@ static __maybe_unused int sdhci_msm_runtime_suspend(struct device *dev) struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + host->runtime_suspended = true; + spin_unlock_irqrestore(&host->lock, flags); /* Drop the performance vote */ dev_pm_opp_set_rate(dev, 0); @@ -2708,6 +2713,7 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev) struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + unsigned long flags; int ret; ret = clk_bulk_prepare_enable(ARRAY_SIZE(msm_host->bulk_clks), @@ -2726,7 +2732,15 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev) dev_pm_opp_set_rate(dev, msm_host->clk_rate); - return sdhci_msm_ice_resume(msm_host); + ret = sdhci_msm_ice_resume(msm_host); + if (ret) + return ret; + + spin_lock_irqsave(&host->lock, flags); + host->runtime_suspended = false; + spin_unlock_irqrestore(&host->lock, flags); + + return ret; } static const struct dev_pm_ops sdhci_msm_pm_ops = { From 1781f2c461804c0123f59afc7350e520a88edffb Mon Sep 17 00:00:00 2001 From: Ikjoon Jang Date: Fri, 23 Feb 2024 17:11:21 +0800 Subject: [PATCH 025/313] arm64: dts: mediatek: mt8183: Add power-domains properity to mfgcfg mfgcfg clock is under MFG_ASYNC power domain. Fixes: e526c9bc11f8 ("arm64: dts: Add Mediatek SoC MT8183 and evaluation board dts and Makefile") Fixes: 37fb78b9aeb7 ("arm64: dts: mediatek: Add mt8183 power domains controller") Signed-off-by: Weiyi Lu Signed-off-by: Ikjoon Jang Reviewed-by: Enric Balletbo i Serra Signed-off-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240223091122.2430037-1-wenst@chromium.org Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8183.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 93dfbf130231..774ae5d9143f 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1637,6 +1637,7 @@ mfgcfg: syscon@13000000 { compatible = "mediatek,mt8183-mfgcfg", "syscon"; reg = <0 0x13000000 0 0x1000>; #clock-cells = <1>; + power-domains = <&spm MT8183_POWER_DOMAIN_MFG_ASYNC>; }; gpu: gpu@13040000 { From 00bcc8810d9dd69d3899a4189e2f3964f263a600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:28 -0500 Subject: [PATCH 026/313] arm64: dts: mediatek: mt8192: Add missing gce-client-reg to mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing mediatek,gce-client-reg property to the mutex node to allow it to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: b4b75bac952b ("arm64: dts: mt8192: Add display nodes") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-1-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8192.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi index 05e401670bce..84cbdf6e9eb0 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi @@ -1464,6 +1464,7 @@ mutex: mutex@14001000 { reg = <0 0x14001000 0 0x1000>; interrupts = ; clocks = <&mmsys CLK_MM_DISP_MUTEX0>; + mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0x1000 0x1000>; mediatek,gce-events = , ; power-domains = <&spm MT8192_POWER_DOMAIN_DISP>; From 96b0c1528ef41fe754f5d1378b1db6c098a2e33f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:29 -0500 Subject: [PATCH 027/313] arm64: dts: mediatek: mt8195: Add missing gce-client-reg to vpp/vdosys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing mediatek,gce-client-reg property to the vppsys and vdosys nodes to allow them to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: 6aa5b46d1755 ("arm64: dts: mt8195: Add vdosys and vppsys clock nodes") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-2-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index ea6dc220e1cc..f2912e1a0e49 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -2028,6 +2028,7 @@ vppsys0: syscon@14000000 { compatible = "mediatek,mt8195-vppsys0", "syscon"; reg = <0 0x14000000 0 0x1000>; #clock-cells = <1>; + mediatek,gce-client-reg = <&gce1 SUBSYS_1400XXXX 0 0x1000>; }; dma-controller@14001000 { @@ -2251,6 +2252,7 @@ vppsys1: syscon@14f00000 { compatible = "mediatek,mt8195-vppsys1", "syscon"; reg = <0 0x14f00000 0 0x1000>; #clock-cells = <1>; + mediatek,gce-client-reg = <&gce1 SUBSYS_14f0XXXX 0 0x1000>; }; mutex@14f01000 { @@ -3080,6 +3082,7 @@ vdosys0: syscon@1c01a000 { reg = <0 0x1c01a000 0 0x1000>; mboxes = <&gce0 0 CMDQ_THR_PRIO_4>; #clock-cells = <1>; + mediatek,gce-client-reg = <&gce0 SUBSYS_1c01XXXX 0xa000 0x1000>; }; From 3b129949184a1251e6a42db714f6d68b75fabedd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:30 -0500 Subject: [PATCH 028/313] arm64: dts: mediatek: mt8195: Add missing gce-client-reg to mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing mediatek,gce-client-reg property to the mutex node to allow it to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: b852ee68fd72 ("arm64: dts: mt8195: Add display node for vdosys0") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-3-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index f2912e1a0e49..fd074103979c 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -3264,6 +3264,7 @@ mutex: mutex@1c016000 { interrupts = ; power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS0>; clocks = <&vdosys0 CLK_VDO0_DISP_MUTEX0>; + mediatek,gce-client-reg = <&gce0 SUBSYS_1c01XXXX 0x6000 0x1000>; mediatek,gce-events = ; }; From 58f126296c3c52d02bf3fad1f68c331d718c4a9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:31 -0500 Subject: [PATCH 029/313] arm64: dts: mediatek: mt8195: Add missing gce-client-reg to mutex1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing mediatek,gce-client-reg property to the mutex1 node to allow it to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: 92d2c23dc269 ("arm64: dts: mt8195: add display node for vdosys1") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-4-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index fd074103979c..5d8b68f86ce4 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -3335,6 +3335,7 @@ mutex1: mutex@1c101000 { power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>; clocks = <&vdosys1 CLK_VDO1_DISP_MUTEX>; clock-names = "vdo1_mutex"; + mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x1000 0x1000>; mediatek,gce-events = ; }; From 17b33dd9e4a38fbaca87c68e532b52f9d0492ba7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Wed, 10 Jan 2024 11:23:01 -0300 Subject: [PATCH 030/313] arm64: dts: mediatek: cherry: Describe CPU supplies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Describe in each CPU node the regulator supplying it. Fixes: 260c04d425eb ("arm64: dts: mediatek: cherry: Enable MT6315 regulators on SPMI bus") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240110142305.755367-2-nfraprado@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- .../boot/dts/mediatek/mt8195-cherry.dtsi | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi index f94c07f8b933..f12322eac389 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi @@ -264,6 +264,38 @@ &auxadc { status = "okay"; }; +&cpu0 { + cpu-supply = <&mt6359_vcore_buck_reg>; +}; + +&cpu1 { + cpu-supply = <&mt6359_vcore_buck_reg>; +}; + +&cpu2 { + cpu-supply = <&mt6359_vcore_buck_reg>; +}; + +&cpu3 { + cpu-supply = <&mt6359_vcore_buck_reg>; +}; + +&cpu4 { + cpu-supply = <&mt6315_6_vbuck1>; +}; + +&cpu5 { + cpu-supply = <&mt6315_6_vbuck1>; +}; + +&cpu6 { + cpu-supply = <&mt6315_6_vbuck1>; +}; + +&cpu7 { + cpu-supply = <&mt6315_6_vbuck1>; +}; + &dp_intf0 { status = "okay"; From 374a7c6400e314458178255a63c37d6347845092 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 15 Mar 2024 19:16:02 +0800 Subject: [PATCH 031/313] arm64: dts: mediatek: mt8192-asurada: Update min voltage constraint for MT6315 Update the minimum voltage from 300000 uV to 400000 uV so it matches the MT6315 datasheet. Also update the minimum voltage for Vgpu regulator from 606250 uV to 400000 uV because the requested voltage could be lower than the minimum voltage on the GPU OPP table when the MTK Smart Voltage Scaling (SVS) driver is enabled. Fixes: 3183cb62b033 ("arm64: dts: mediatek: asurada: Add SPMI regulators") Signed-off-by: Pin-yen Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240315111621.2263159-2-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi index 9b738f6a5d21..7a704246678f 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi @@ -1421,7 +1421,7 @@ regulators { mt6315_6_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vbcpu"; - regulator-min-microvolt = <300000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-allowed-modes = <0 1 2>; @@ -1431,7 +1431,7 @@ mt6315_6_vbuck1: vbuck1 { mt6315_6_vbuck3: vbuck3 { regulator-compatible = "vbuck3"; regulator-name = "Vlcpu"; - regulator-min-microvolt = <300000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-allowed-modes = <0 1 2>; @@ -1448,7 +1448,7 @@ regulators { mt6315_7_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vgpu"; - regulator-min-microvolt = <606250>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <800000>; regulator-enable-ramp-delay = <256>; regulator-allowed-modes = <0 1 2>; From e9a6b8b5c61350535c7eb5ea9b2dde0d5745bd1b Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 15 Mar 2024 19:16:03 +0800 Subject: [PATCH 032/313] arm64: dts: mediatek: mt8195-cherry: Update min voltage constraint for MT6315 Update the minimum voltage from 300000 uV to 400000 uV so it matches the MT6315 datasheet. Also update the minimum voltage for Vgpu regulator from 625000 uV to 400000 uV because the requested voltage could be lower than the minimum voltage on the GPU OPP table when the MTK Smart Voltage Scaling (SVS) driver is enabled. Fixes: 260c04d425eb ("arm64: dts: mediatek: cherry: Enable MT6315 regulators on SPMI bus") Signed-off-by: Pin-yen Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240315111621.2263159-3-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi index f12322eac389..4a11918da370 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi @@ -1246,7 +1246,7 @@ regulators { mt6315_6_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vbcpu"; - regulator-min-microvolt = <300000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-ramp-delay = <6250>; @@ -1264,7 +1264,7 @@ regulators { mt6315_7_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vgpu"; - regulator-min-microvolt = <625000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-ramp-delay = <6250>; From 296118a8dc297de47d9b3a364b9743f8446bd612 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 15 Mar 2024 19:16:04 +0800 Subject: [PATCH 033/313] arm64: dts: mediatek: mt8183-kukui: Use default min voltage for MT6358 The requested voltage could be lower than the minimum voltage on the GPU OPP table when the MTK Smart Voltage Scaling (SVS) driver is enabled, so removing the definition in mt8183-kukui to use the default minimum voltage (500000 uV) defined in mt6358.dtsi. Fixes: 31c6732da9d5 ("arm64: dts: mediatek: mt8183-kukui: Override vgpu/vsram_gpu constraints") Signed-off-by: Pin-yen Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240315111621.2263159-4-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index 6bd7424ef66c..100191c6453b 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -433,7 +433,6 @@ &mt6358regulator { }; &mt6358_vgpu_reg { - regulator-min-microvolt = <625000>; regulator-max-microvolt = <900000>; regulator-coupled-with = <&mt6358_vsram_gpu_reg>; From 366940c860bc27cc1cc92061e6626a4fa56bab3c Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 15 Mar 2024 19:16:05 +0800 Subject: [PATCH 034/313] arm64: dts: mediatek: mt8186-corsola: Update min voltage constraint for Vgpu The requested voltage could be lower than the minimum voltage on the GPU OPP table when the MTK Smart Voltage Scaling (SVS) driver is enabled, so update the minimum voltage constraint from 600000 uV to 500000 uV as listed on the mt6366 datasheet. Fixes: 8855d01fb81f ("arm64: dts: mediatek: Add MT8186 Krabby platform based Tentacruel / Tentacool") Signed-off-by: Pin-yen Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240315111621.2263159-5-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi b/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi index 3dea28f1d806..1807e9d6cb0e 100644 --- a/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi @@ -1296,7 +1296,7 @@ mt6366_vgpu_reg: vgpu { * regulator coupling requirements. */ regulator-name = "ppvar_dvdd_vgpu"; - regulator-min-microvolt = <600000>; + regulator-min-microvolt = <500000>; regulator-max-microvolt = <950000>; regulator-ramp-delay = <6250>; regulator-enable-ramp-delay = <200>; From 3ba5a61594347ab46e7c2cff6cd63ea0f1282efb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:47 +0100 Subject: [PATCH 035/313] arm64: dts: mediatek: mt7622: fix clock controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Drop unneeded "syscon"s (bindings were updated recently) 2. Use "clock-controller" in nodenames 3. Add missing "#clock-cells" Fixes: d7167881e03e ("arm64: dts: mt7622: add clock controller device nodes") Fixes: e9b65ecb7c30 ("arm64: dts: mediatek: mt7622: introduce nodes for Wireless Ethernet Dispatch") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-2-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 27 +++++++++++------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 3ee9266fa8e9..283fdf7d2d8b 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -283,16 +283,14 @@ thermal_calibration: calib@198 { }; }; - apmixedsys: apmixedsys@10209000 { - compatible = "mediatek,mt7622-apmixedsys", - "syscon"; + apmixedsys: clock-controller@10209000 { + compatible = "mediatek,mt7622-apmixedsys"; reg = <0 0x10209000 0 0x1000>; #clock-cells = <1>; }; - topckgen: topckgen@10210000 { - compatible = "mediatek,mt7622-topckgen", - "syscon"; + topckgen: clock-controller@10210000 { + compatible = "mediatek,mt7622-topckgen"; reg = <0 0x10210000 0 0x1000>; #clock-cells = <1>; }; @@ -734,9 +732,8 @@ wmac: wmac@18000000 { power-domains = <&scpsys MT7622_POWER_DOMAIN_WB>; }; - ssusbsys: ssusbsys@1a000000 { - compatible = "mediatek,mt7622-ssusbsys", - "syscon"; + ssusbsys: clock-controller@1a000000 { + compatible = "mediatek,mt7622-ssusbsys"; reg = <0 0x1a000000 0 0x1000>; #clock-cells = <1>; #reset-cells = <1>; @@ -793,9 +790,8 @@ u2port1: usb-phy@1a0c5000 { }; }; - pciesys: pciesys@1a100800 { - compatible = "mediatek,mt7622-pciesys", - "syscon"; + pciesys: clock-controller@1a100800 { + compatible = "mediatek,mt7622-pciesys"; reg = <0 0x1a100800 0 0x1000>; #clock-cells = <1>; #reset-cells = <1>; @@ -921,12 +917,13 @@ sata_port: sata-phy@1a243000 { }; }; - hifsys: syscon@1af00000 { - compatible = "mediatek,mt7622-hifsys", "syscon"; + hifsys: clock-controller@1af00000 { + compatible = "mediatek,mt7622-hifsys"; reg = <0 0x1af00000 0 0x70>; + #clock-cells = <1>; }; - ethsys: syscon@1b000000 { + ethsys: clock-controller@1b000000 { compatible = "mediatek,mt7622-ethsys", "syscon"; reg = <0 0x1b000000 0 0x1000>; From 800dc93c3941e372c94278bf4059e6e82f60bd66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:48 +0100 Subject: [PATCH 036/313] arm64: dts: mediatek: mt7622: fix IR nodename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix following validation error: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: cir@10009000: $nodename:0: 'cir@10009000' does not match '^ir(-receiver)?(@[a-f0-9]+)?$' from schema $id: http://devicetree.org/schemas/media/mediatek,mt7622-cir.yaml# Fixes: ae457b7679c4 ("arm64: dts: mt7622: add SoC and peripheral related device nodes") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-3-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 283fdf7d2d8b..4c8a71c8184b 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -252,7 +252,7 @@ scpsys: power-controller@10006000 { clock-names = "hif_sel"; }; - cir: cir@10009000 { + cir: ir-receiver@10009000 { compatible = "mediatek,mt7622-cir"; reg = <0 0x10009000 0 0x1000>; interrupts = ; From 208add29ce5b7291f6c466e4dfd9cbf61c72888e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:49 +0100 Subject: [PATCH 037/313] arm64: dts: mediatek: mt7622: fix ethernet controller "compatible" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix following validation error: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: ethernet@1b100000: compatible: ['mediatek,mt7622-eth', 'mediatek,mt2701-eth', 'syscon'] is too long from schema $id: http://devicetree.org/schemas/net/mediatek,net.yaml# (and other complains about wrong clocks). Fixes: 5f599b3a0bb8 ("arm64: dts: mt7622: add ethernet device nodes") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-4-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 4c8a71c8184b..8e46480b5364 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -963,9 +963,7 @@ wed1: wed@1020b000 { }; eth: ethernet@1b100000 { - compatible = "mediatek,mt7622-eth", - "mediatek,mt2701-eth", - "syscon"; + compatible = "mediatek,mt7622-eth"; reg = <0 0x1b100000 0 0x20000>; interrupts = , , From ecb5b0034f5bcc35003b4b965cf50c6e98316e79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:50 +0100 Subject: [PATCH 038/313] arm64: dts: mediatek: mt7622: drop "reset-names" from thermal block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Binding doesn't specify "reset-names" property and Linux driver also doesn't use it. Fix following validation error: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: thermal@1100b000: Unevaluated properties are not allowed ('reset-names' was unexpected) from schema $id: http://devicetree.org/schemas/thermal/mediatek,thermal.yaml# Fixes: ae457b7679c4 ("arm64: dts: mt7622: add SoC and peripheral related device nodes") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-5-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 8e46480b5364..917fa39a74f8 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -513,7 +513,6 @@ thermal: thermal@1100b000 { <&pericfg CLK_PERI_AUXADC_PD>; clock-names = "therm", "auxadc"; resets = <&pericfg MT7622_PERI_THERM_SW_RST>; - reset-names = "therm"; mediatek,auxadc = <&auxadc>; mediatek,apmixedsys = <&apmixedsys>; nvmem-cells = <&thermal_calibration>; From 8db8c77059e75a0f418b10ede39dd82a9eb031fa Mon Sep 17 00:00:00 2001 From: Nuno Pereira Date: Mon, 26 Feb 2024 22:39:31 +0000 Subject: [PATCH 039/313] HID: nintendo: Fix N64 controller being identified as mouse This patch is regarding the recent addition of support for the NSO controllers to hid-nintendo. All controllers are working correctly with the exception of the N64 controller, which is being identified as a mouse by udev. This results in the joystick controlling the mouse cursor and the controller not being detected by games. The reason for this is because the N64's C buttons have been attributed to BTN_FORWARD, BTN_BACK, BTN_LEFT, BTN_RIGHT, which are buttons typically attributed to mice. This patch changes those buttons to controller buttons, making the controller be correctly identified as such. Signed-off-by: Nuno Pereira Signed-off-by: Jiri Kosina --- drivers/hid/hid-nintendo.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index ab5953fc2436..80e0f23c1c33 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -481,10 +481,10 @@ static const struct joycon_ctlr_button_mapping n64con_button_mappings[] = { { BTN_TR, JC_BTN_R, }, { BTN_TR2, JC_BTN_LSTICK, }, /* ZR */ { BTN_START, JC_BTN_PLUS, }, - { BTN_FORWARD, JC_BTN_Y, }, /* C UP */ - { BTN_BACK, JC_BTN_ZR, }, /* C DOWN */ - { BTN_LEFT, JC_BTN_X, }, /* C LEFT */ - { BTN_RIGHT, JC_BTN_MINUS, }, /* C RIGHT */ + { BTN_SELECT, JC_BTN_Y, }, /* C UP */ + { BTN_X, JC_BTN_ZR, }, /* C DOWN */ + { BTN_Y, JC_BTN_X, }, /* C LEFT */ + { BTN_C, JC_BTN_MINUS, }, /* C RIGHT */ { BTN_MODE, JC_BTN_HOME, }, { BTN_Z, JC_BTN_CAP, }, { /* sentinel */ }, From ea36bf1827462e4a52365bf8e3f7d1712c5d9600 Mon Sep 17 00:00:00 2001 From: Kenny Levinsen Date: Tue, 2 Apr 2024 13:10:04 +0200 Subject: [PATCH 040/313] HID: i2c-hid: Revert to await reset ACK before reading report descriptor In af93a167eda9, i2c_hid_parse was changed to continue with reading the report descriptor before waiting for reset to be acknowledged. This has lead to two regressions: 1. We fail to handle reset acknowledgment if it happens while reading the report descriptor. The transfer sets I2C_HID_READ_PENDING, which causes the IRQ handler to return without doing anything. This affects both a Wacom touchscreen and a Sensel touchpad. 2. On a Sensel touchpad, reading the report descriptor this quickly after reset results in all zeroes or partial zeroes. The issues were observed on the Lenovo Thinkpad Z16 Gen 2. The change in question was made based on a Microsoft article[0] stating that Windows 8 *may* read the report descriptor in parallel with awaiting reset acknowledgment, intended as a slight reset performance optimization. Perhaps they only do this if reset is not completing quickly enough for their tastes? As the code is not currently ready to read registers in parallel with a pending reset acknowledgment, and as reading quickly breaks the report descriptor on the Sensel touchpad, revert to waiting for reset acknowledgment before proceeding to read the report descriptor. [0]: https://learn.microsoft.com/en-us/windows-hardware/drivers/hid/plug-and-play-support-and-power-management Fixes: af93a167eda9 ("HID: i2c-hid: Move i2c_hid_finish_hwreset() to after reading the report-descriptor") Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2271136 Cc: stable@vger.kernel.org Signed-off-by: Kenny Levinsen Link: https://lore.kernel.org/r/20240331182440.14477-1-kl@kl.wtf [hdegoede@redhat.com Drop no longer necessary abort_reset error exit path] Signed-off-by: Hans de Goede Tested-by: Mark Pearson Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid-core.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 1c86c97688e9..d965382196c6 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -726,12 +726,15 @@ static int i2c_hid_parse(struct hid_device *hid) mutex_lock(&ihid->reset_lock); do { ret = i2c_hid_start_hwreset(ihid); - if (ret) + if (ret == 0) + ret = i2c_hid_finish_hwreset(ihid); + else msleep(1000); } while (tries-- > 0 && ret); + mutex_unlock(&ihid->reset_lock); if (ret) - goto abort_reset; + return ret; use_override = i2c_hid_get_dmi_hid_report_desc_override(client->name, &rsize); @@ -741,11 +744,8 @@ static int i2c_hid_parse(struct hid_device *hid) i2c_hid_dbg(ihid, "Using a HID report descriptor override\n"); } else { rdesc = kzalloc(rsize, GFP_KERNEL); - - if (!rdesc) { - ret = -ENOMEM; - goto abort_reset; - } + if (!rdesc) + return -ENOMEM; i2c_hid_dbg(ihid, "asking HID report descriptor\n"); @@ -754,23 +754,10 @@ static int i2c_hid_parse(struct hid_device *hid) rdesc, rsize); if (ret) { hid_err(hid, "reading report descriptor failed\n"); - goto abort_reset; + goto out; } } - /* - * Windows directly reads the report-descriptor after sending reset - * and then waits for resets completion afterwards. Some touchpads - * actually wait for the report-descriptor to be read before signalling - * reset completion. - */ - ret = i2c_hid_finish_hwreset(ihid); -abort_reset: - clear_bit(I2C_HID_RESET_PENDING, &ihid->flags); - mutex_unlock(&ihid->reset_lock); - if (ret) - goto out; - i2c_hid_dbg(ihid, "Report Descriptor: %*ph\n", rsize, rdesc); ret = hid_parse_report(hid, rdesc, rsize); From 21f28a7eb78dea6c59be6b0a5e0b47bf3d25fcbb Mon Sep 17 00:00:00 2001 From: Yaraslau Furman Date: Wed, 3 Apr 2024 19:54:24 +0300 Subject: [PATCH 041/313] HID: logitech-dj: allow mice to use all types of reports You can bind whatever action you want to the mouse's reprogrammable buttons using Windows application. Allow Linux to receive multimedia keycodes. Fixes: 3ed224e273ac ("HID: logitech-dj: Fix 064d:c52f receiver support") Signed-off-by: Yaraslau Furman Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-dj.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index e6a8b6d8eab7..3c3c497b6b91 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -965,9 +965,7 @@ static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev, } break; case REPORT_TYPE_MOUSE: - workitem->reports_supported |= STD_MOUSE | HIDPP; - if (djrcv_dev->type == recvr_type_mouse_only) - workitem->reports_supported |= MULTIMEDIA; + workitem->reports_supported |= STD_MOUSE | HIDPP | MULTIMEDIA; break; } } From ace323f80b9bc6734289a4e8a77938a3ce964c7d Mon Sep 17 00:00:00 2001 From: Maksim Kiselev Date: Tue, 2 Apr 2024 12:35:39 +0300 Subject: [PATCH 042/313] mmc: sdhci-of-dwcmshc: th1520: Increase tuning loop count to 128 Fix SD card tuning error by increasing tuning loop count from 40(MAX_TUNING_LOOP) to 128. For some reason the tuning algorithm requires to move through all the taps of delay line even if the THRESHOLD_MODE (bit 2 in AT_CTRL_R) is used instead of the LARGEST_WIN_MODE. Tested-by: Drew Fustini Tested-by: Xi Ruoyao Signed-off-by: Maksim Kiselev Acked-by: Adrian Hunter Fixes: 43658a542ebf ("mmc: sdhci-of-dwcmshc: Add support for T-Head TH1520") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240402093539.184287-1-bigunclemax@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-dwcmshc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index 1d8f5a76096a..f2e4a93ed1d6 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -626,6 +626,7 @@ static int th1520_execute_tuning(struct sdhci_host *host, u32 opcode) /* perform tuning */ sdhci_start_tuning(host); + host->tuning_loop_count = 128; host->tuning_err = __sdhci_execute_tuning(host, opcode); if (host->tuning_err) { /* disable auto-tuning upon tuning error */ From 49ceae68a0df9a92617a61e9ce8a0efcf6419585 Mon Sep 17 00:00:00 2001 From: Laine Taffin Altman Date: Wed, 3 Apr 2024 14:06:59 -0700 Subject: [PATCH 043/313] rust: init: remove impl Zeroable for Infallible In Rust, producing an invalid value of any type is immediate undefined behavior (UB); this includes via zeroing memory. Therefore, since an uninhabited type has no valid values, producing any values at all for it is UB. The Rust standard library type `core::convert::Infallible` is uninhabited, by virtue of having been declared as an enum with no cases, which always produces uninhabited types in Rust. The current kernel code allows this UB to be triggered, for example by code like `Box::::init(kernel::init::zeroed())`. Thus, remove the implementation of `Zeroable` for `Infallible`, thereby avoiding the unsoundness (potential for future UB). Cc: stable@vger.kernel.org Fixes: 38cde0bd7b67 ("rust: init: add `Zeroable` trait and `init::zeroed` function") Closes: https://github.com/Rust-for-Linux/pinned-init/pull/13 Signed-off-by: Laine Taffin Altman Reviewed-by: Alice Ryhl Reviewed-by: Boqun Feng Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/CA160A4E-561E-4918-837E-3DCEBA74F808@me.com [ Reformatted the comment slightly. ] Signed-off-by: Miguel Ojeda --- rust/kernel/init.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 424257284d16..09004b56fb65 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -1292,8 +1292,15 @@ macro_rules! impl_zeroable { i8, i16, i32, i64, i128, isize, f32, f64, - // SAFETY: These are ZSTs, there is nothing to zero. - {} PhantomData, core::marker::PhantomPinned, Infallible, (), + // Note: do not add uninhabited types (such as `!` or `core::convert::Infallible`) to this list; + // creating an instance of an uninhabited type is immediate undefined behavior. For more on + // uninhabited/empty types, consult The Rustonomicon: + // . The Rust Reference + // also has information on undefined behavior: + // . + // + // SAFETY: These are inhabited ZSTs; there is nothing to zero and a valid value exists. + {} PhantomData, core::marker::PhantomPinned, (), // SAFETY: Type is allowed to take any value, including all zeros. {} MaybeUninit, From 3b449bfd2ff6c5d3ceecfcb18528ff8e1b4ac2fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 13 Feb 2024 06:37:37 +0100 Subject: [PATCH 044/313] arm64: dts: mediatek: mt7986: drop invalid properties from ethsys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mediatek ethsys controller / syscon binding doesn't allow any subnodes so "#address-cells" and "#size-cells" are redundant (actually: disallowed). This fixes: arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dtb: syscon@15000000: '#address-cells', '#size-cells' do not match any of the regexes: 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/clock/mediatek,ethsys.yaml# Fixes: 1f9986b258c2 ("arm64: dts: mediatek: add clock support for mt7986a") Cc: Sam Shih Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240213053739.14387-1-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index b3f416b9a7a4..228e02954e85 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -492,8 +492,6 @@ ethsys: syscon@15000000 { compatible = "mediatek,mt7986-ethsys", "syscon"; reg = <0 0x15000000 0 0x1000>; - #address-cells = <1>; - #size-cells = <1>; #clock-cells = <1>; #reset-cells = <1>; }; From 9bd88afc94c3570289a0f1c696578b3e1f4e3169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 13 Feb 2024 06:37:38 +0100 Subject: [PATCH 045/313] arm64: dts: mediatek: mt7986: drop "#reset-cells" from Ethernet controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ethernet block doesn't include or act as a reset controller. Documentation also doesn't document "#reset-cells" for it. This fixes: arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dtb: ethernet@15100000: Unevaluated properties are not allowed ('#reset-cells' was unexpected) from schema $id: http://devicetree.org/schemas/net/mediatek,net.yaml# Fixes: 082ff36bd5c0 ("arm64: dts: mediatek: mt7986: introduce ethernet nodes") Cc: Lorenzo Bianconi Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240213053739.14387-2-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 228e02954e85..f3a2a89fada4 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -554,7 +554,6 @@ eth: ethernet@15100000 { <&topckgen CLK_TOP_SGM_325M_SEL>; assigned-clock-parents = <&apmixedsys CLK_APMIXED_NET2PLL>, <&apmixedsys CLK_APMIXED_SGMPLL>; - #reset-cells = <1>; #address-cells = <1>; #size-cells = <0>; mediatek,ethsys = <ðsys>; From 970f8b01bd7719a22e577ba6c78e27f9ccf22783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 13 Feb 2024 06:37:39 +0100 Subject: [PATCH 046/313] arm64: dts: mediatek: mt7986: drop invalid thermal block clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thermal block uses only two clocks. Its binding doesn't document or allow "adc_32k". Also Linux driver doesn't support it. It has been additionally verified by Angelo by his detailed research on MT7981 / MT7986 clocks (thanks!). This fixes: arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dtb: thermal@1100c800: clocks: [[4, 27], [4, 44], [4, 45]] is too long from schema $id: http://devicetree.org/schemas/thermal/mediatek,thermal.yaml# arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dtb: thermal@1100c800: clock-names: ['therm', 'auxadc', 'adc_32k'] is too long from schema $id: http://devicetree.org/schemas/thermal/mediatek,thermal.yaml# Fixes: 0a9615d58d04 ("arm64: dts: mt7986: add thermal and efuse") Cc: Daniel Golle Link: https://lore.kernel.org/linux-devicetree/17d143aa-576e-4d67-a0ea-b79f3518b81c@collabora.com/ Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240213053739.14387-3-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index f3a2a89fada4..559990dcd1d1 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -332,9 +332,8 @@ thermal: thermal@1100c800 { reg = <0 0x1100c800 0 0x800>; interrupts = ; clocks = <&infracfg CLK_INFRA_THERM_CK>, - <&infracfg CLK_INFRA_ADC_26M_CK>, - <&infracfg CLK_INFRA_ADC_FRC_CK>; - clock-names = "therm", "auxadc", "adc_32k"; + <&infracfg CLK_INFRA_ADC_26M_CK>; + clock-names = "therm", "auxadc"; nvmem-cells = <&thermal_calibration>; nvmem-cell-names = "calibration-data"; #thermal-sensor-cells = <1>; From f8c65a5e4560781f2ea175d8f26cd75ac98e8d78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 13 Feb 2024 07:14:59 +0100 Subject: [PATCH 047/313] arm64: dts: mediatek: mt7986: prefix BPI-R3 cooling maps with "map-" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes: arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dtb: thermal-zones: cpu-thermal:cooling-maps: 'cpu-active-high', 'cpu-active-low', 'cpu-active-med' do not match any of the regexes: '^map[-a-zA-Z0-9]*$', 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/thermal/thermal-zones.yaml# Fixes: c26f779a2295 ("arm64: dts: mt7986: add pwm-fan and cooling-maps to BPI-R3 dts") Cc: Daniel Golle Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240213061459.17917-1-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts index e04b1c0c0ebb..ed79ad1ae871 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts @@ -146,19 +146,19 @@ sfp2: sfp-2 { &cpu_thermal { cooling-maps { - cpu-active-high { + map-cpu-active-high { /* active: set fan to cooling level 2 */ cooling-device = <&fan 2 2>; trip = <&cpu_trip_active_high>; }; - cpu-active-med { + map-cpu-active-med { /* active: set fan to cooling level 1 */ cooling-device = <&fan 1 1>; trip = <&cpu_trip_active_med>; }; - cpu-active-low { + map-cpu-active-low { /* active: set fan to cooling level 0 */ cooling-device = <&fan 0 0>; trip = <&cpu_trip_active_low>; From 3baac7291effb501c4d52df7019ebf52011e5772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 1 Mar 2024 08:47:41 +0100 Subject: [PATCH 048/313] arm64: dts: mediatek: mt2712: fix validation errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Fixup infracfg clock controller binding It also acts as reset controller so #reset-cells is required. 2. Use -pins suffix for pinctrl This fixes: arch/arm64/boot/dts/mediatek/mt2712-evb.dtb: syscon@10001000: '#reset-cells' is a required property from schema $id: http://devicetree.org/schemas/arm/mediatek/mediatek,infracfg.yaml# arch/arm64/boot/dts/mediatek/mt2712-evb.dtb: pinctrl@1000b000: 'eth_default', 'eth_sleep', 'usb0_iddig', 'usb1_iddig' do not match any of the regexes: 'pinctrl-[0-9]+', 'pins$' from schema $id: http://devicetree.org/schemas/pinctrl/mediatek,mt65xx-pinctrl.yaml# Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240301074741.8362-1-zajec5@gmail.com [Angelo: Added Fixes tags] Fixes: 5d4839709c8e ("arm64: dts: mt2712: Add clock controller device nodes") Fixes: 1724f4cc5133 ("arm64: dts: Add USB3 related nodes for MT2712") Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt2712-evb.dts | 8 ++++---- arch/arm64/boot/dts/mediatek/mt2712e.dtsi | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts index 0c38f7b51763..234e3b23d7a8 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts @@ -129,7 +129,7 @@ ethernet_phy0: ethernet-phy@5 { }; &pio { - eth_default: eth_default { + eth_default: eth-default-pins { tx_pins { pinmux = , , @@ -156,7 +156,7 @@ mdio_pins { }; }; - eth_sleep: eth_sleep { + eth_sleep: eth-sleep-pins { tx_pins { pinmux = , , @@ -182,14 +182,14 @@ mdio_pins { }; }; - usb0_id_pins_float: usb0_iddig { + usb0_id_pins_float: usb0-iddig-pins { pins_iddig { pinmux = ; bias-pull-up; }; }; - usb1_id_pins_float: usb1_iddig { + usb1_id_pins_float: usb1-iddig-pins { pins_iddig { pinmux = ; bias-pull-up; diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi index 6d218caa198c..082672efba0a 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi @@ -249,10 +249,11 @@ topckgen: syscon@10000000 { #clock-cells = <1>; }; - infracfg: syscon@10001000 { + infracfg: clock-controller@10001000 { compatible = "mediatek,mt2712-infracfg", "syscon"; reg = <0 0x10001000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; pericfg: syscon@10003000 { From e4308bc22b9d46cf33165c9dfaeebcf29cd56f04 Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Thu, 21 Mar 2024 19:47:30 +0300 Subject: [PATCH 049/313] phy: marvell: a3700-comphy: Fix out of bounds read There is an out of bounds read access of 'gbe_phy_init_fix[fix_idx].addr' every iteration after 'fix_idx' reaches 'ARRAY_SIZE(gbe_phy_init_fix)'. Make sure 'gbe_phy_init[addr]' is used when all elements of 'gbe_phy_init_fix' array are handled. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 934337080c6c ("phy: marvell: phy-mvebu-a3700-comphy: Add native kernel implementation") Signed-off-by: Mikhail Kobuk Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20240321164734.49273-1-m.kobuk@ispras.ru Signed-off-by: Vinod Koul --- drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c index 41162d7228c9..68710ad1ad70 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c @@ -611,11 +611,12 @@ static void comphy_gbe_phy_init(struct mvebu_a3700_comphy_lane *lane, * comparison to 3.125 Gbps values. These register values are * stored in "gbe_phy_init_fix" array. */ - if (!is_1gbps && gbe_phy_init_fix[fix_idx].addr == addr) { + if (!is_1gbps && + fix_idx < ARRAY_SIZE(gbe_phy_init_fix) && + gbe_phy_init_fix[fix_idx].addr == addr) { /* Use new value */ val = gbe_phy_init_fix[fix_idx].value; - if (fix_idx < ARRAY_SIZE(gbe_phy_init_fix)) - fix_idx++; + fix_idx++; } else { val = gbe_phy_init[addr]; } From 627207703b73615653eea5ab7a841d5b478d961e Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Thu, 21 Mar 2024 19:47:31 +0300 Subject: [PATCH 050/313] phy: marvell: a3700-comphy: Fix hardcoded array size Replace hardcoded 'gbe_phy_init' array size by explicit one. Fixes: 934337080c6c ("phy: marvell: phy-mvebu-a3700-comphy: Add native kernel implementation") Signed-off-by: Mikhail Kobuk Link: https://lore.kernel.org/r/20240321164734.49273-2-m.kobuk@ispras.ru Signed-off-by: Vinod Koul --- drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c index 68710ad1ad70..1d1db1737422 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c @@ -603,7 +603,7 @@ static void comphy_gbe_phy_init(struct mvebu_a3700_comphy_lane *lane, u16 val; fix_idx = 0; - for (addr = 0; addr < 512; addr++) { + for (addr = 0; addr < ARRAY_SIZE(gbe_phy_init); addr++) { /* * All PHY register values are defined in full for 3.125Gbps * SERDES speed. The values required for 1.25 Gbps are almost From 3a161017f1de55cc48be81f6156004c151f32677 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Fri, 22 Mar 2024 14:06:32 +0100 Subject: [PATCH 051/313] phy: freescale: imx8m-pcie: fix pcie link-up instability Leaving AUX_PLL_REFCLK_SEL at its reset default of AUX_IN (PLL clock) proves to be more stable on the i.MX 8M Mini. Fixes: 1aa97b002258 ("phy: freescale: pcie: Initialize the imx8 pcie standalone phy driver") Signed-off-by: Marcel Ziswiler Reviewed-by: Richard Zhu Link: https://lore.kernel.org/r/20240322130646.1016630-2-marcel@ziswiler.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index b700f52b7b67..11fcb1867118 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -110,8 +110,10 @@ static int imx8_pcie_phy_power_on(struct phy *phy) /* Source clock from SoC internal PLL */ writel(ANA_PLL_CLK_OUT_TO_EXT_IO_SEL, imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG062); - writel(AUX_PLL_REFCLK_SEL_SYS_PLL, - imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063); + if (imx8_phy->drvdata->variant != IMX8MM) { + writel(AUX_PLL_REFCLK_SEL_SYS_PLL, + imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063); + } val = ANA_AUX_RX_TX_SEL_TX | ANA_AUX_TX_TERM; writel(val | ANA_AUX_RX_TERM_GND_EN, imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG064); From f8020dfb311d2b6cf657668792aaa5fa8863a7dd Mon Sep 17 00:00:00 2001 From: Michal Tomek Date: Thu, 4 Apr 2024 19:11:26 +0200 Subject: [PATCH 052/313] phy: rockchip-snps-pcie3: fix bifurcation on rk3588 So far all RK3588 boards use fully aggregated PCIe. CM3588 is one of the few boards using this feature and apparently it is broken. The PHY offers the following mapping options: port 0 lane 0 - always mapped to controller 0 (4L) port 0 lane 1 - to controller 0 or 2 (1L0) port 1 lane 0 - to controller 0 or 1 (2L) port 1 lane 1 - to controller 0, 1 or 3 (1L1) The data-lanes DT property maps these as follows: 0 = no controller (unsupported by the HW) 1 = 4L 2 = 2L 3 = 1L0 4 = 1L1 That allows the following configurations with first column being the mainline data-lane mapping, second column being the downstream name, third column being PCIE3PHY_GRF_CMN_CON0 and PHP_GRF_PCIESEL register values and final column being the user visible lane setup: <1 1 1 1> = AGGREG = [4 0] = x4 (aggregation) <1 1 2 2> = NANBNB = [0 0] = x2 x2 (no bif.) <1 3 2 2> = NANBBI = [1 1] = x2 x1x1 (bif. of port 0) <1 1 2 4> = NABINB = [2 2] = x1x1 x2 (bif. of port 1) <1 3 2 4> = NABIBI = [3 3] = x1x1 x1x1 (bif. of both ports) The driver currently does not program PHP_GRF_PCIESEL correctly, which is fixed by this patch. As a side-effect the new logic is much simpler than the old logic. Fixes: 2e9bffc4f713 ("phy: rockchip: Support PCIe v3") Signed-off-by: Michal Tomek Signed-off-by: Sebastian Reichel Acked-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240404-rk3588-pcie-bifurcation-fixes-v1-1-9907136eeafd@kernel.org Signed-off-by: Vinod Koul --- .../phy/rockchip/phy-rockchip-snps-pcie3.c | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c index 121e5961ce11..d5bcc9c42b28 100644 --- a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c +++ b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c @@ -132,7 +132,7 @@ static const struct rockchip_p3phy_ops rk3568_ops = { static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv) { u32 reg = 0; - u8 mode = 0; + u8 mode = RK3588_LANE_AGGREGATION; /* default */ int ret; /* Deassert PCIe PMA output clamp mode */ @@ -140,28 +140,20 @@ static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv) /* Set bifurcation if needed */ for (int i = 0; i < priv->num_lanes; i++) { - if (!priv->lanes[i]) - mode |= (BIT(i) << 3); - if (priv->lanes[i] > 1) - mode |= (BIT(i) >> 1); - } - - if (!mode) - reg = RK3588_LANE_AGGREGATION; - else { - if (mode & (BIT(0) | BIT(1))) - reg |= RK3588_BIFURCATION_LANE_0_1; - - if (mode & (BIT(2) | BIT(3))) - reg |= RK3588_BIFURCATION_LANE_2_3; + mode &= ~RK3588_LANE_AGGREGATION; + if (priv->lanes[i] == 3) + mode |= RK3588_BIFURCATION_LANE_0_1; + if (priv->lanes[i] == 4) + mode |= RK3588_BIFURCATION_LANE_2_3; } + reg = mode; regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, (0x7<<16) | reg); /* Set pcie1ln_sel in PHP_GRF_PCIESEL_CON */ if (!IS_ERR(priv->pipe_grf)) { - reg = (mode & (BIT(6) | BIT(7))) >> 6; + reg = mode & 3; if (reg) regmap_write(priv->pipe_grf, PHP_GRF_PCIESEL_CON, (reg << 16) | reg); From 55491a5fa163bf15158f34f3650b3985f25622b9 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 4 Apr 2024 19:11:27 +0200 Subject: [PATCH 053/313] phy: rockchip-snps-pcie3: fix clearing PHP_GRF_PCIESEL_CON bits Currently the PCIe v3 PHY driver only sets the pcie1ln_sel bits, but does not clear them because of an incorrect write mask. This fixes up the issue by using a newly introduced constant for the write mask. While at it also introduces a proper GENMASK based constant for the PCIE30_PHY_MODE. Fixes: 2e9bffc4f713 ("phy: rockchip: Support PCIe v3") Signed-off-by: Sebastian Reichel Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240404-rk3588-pcie-bifurcation-fixes-v1-2-9907136eeafd@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-snps-pcie3.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c index d5bcc9c42b28..9857ee45b89e 100644 --- a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c +++ b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c @@ -40,6 +40,8 @@ #define RK3588_BIFURCATION_LANE_0_1 BIT(0) #define RK3588_BIFURCATION_LANE_2_3 BIT(1) #define RK3588_LANE_AGGREGATION BIT(2) +#define RK3588_PCIE1LN_SEL_EN (GENMASK(1, 0) << 16) +#define RK3588_PCIE30_PHY_MODE_EN (GENMASK(2, 0) << 16) struct rockchip_p3phy_ops; @@ -149,14 +151,15 @@ static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv) } reg = mode; - regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, (0x7<<16) | reg); + regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, + RK3588_PCIE30_PHY_MODE_EN | reg); /* Set pcie1ln_sel in PHP_GRF_PCIESEL_CON */ if (!IS_ERR(priv->pipe_grf)) { - reg = mode & 3; + reg = mode & (RK3588_BIFURCATION_LANE_0_1 | RK3588_BIFURCATION_LANE_2_3); if (reg) regmap_write(priv->pipe_grf, PHP_GRF_PCIESEL_CON, - (reg << 16) | reg); + RK3588_PCIE1LN_SEL_EN | reg); } reset_control_deassert(priv->p30phy); From d16d4002fea69b6609b852dd8db1f5844c02fbe4 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 4 Apr 2024 19:11:28 +0200 Subject: [PATCH 054/313] phy: rockchip: naneng-combphy: Fix mux on rk3588 The pcie1l0_sel and pcie1l1_sel bits in PCIESEL_CON configure the mux for PCIe1L0 and PCIe1L1 to either the PIPE Combo PHYs or the PCIe3 PHY. Thus this configuration interfers with the data-lanes configuration done by the PCIe3 PHY. RK3588 has three Combo PHYs. The first one has a dedicated PCIe controller and is not affected by this. For the other two Combo PHYs, there is one mux for each of them. pcie1l0_sel selects if PCIe 1L0 is muxed to Combo PHY 1 when bit is set to 0 or to the PCIe3 PHY when bit is set to 1. pcie1l1_sel selects if PCIe 1L1 is muxed to Combo PHY 2 when bit is set to 0 or to the PCIe3 PHY when bit is set to 1. Currently the code always muxes 1L0 and 1L1 to the Combi PHYs once one of them is being used in PCIe mode. This is obviously wrong when at least one of the ports should be muxed to the PCIe3 PHY. Fix this by introducing Combo PHY identification and then only setting up the required bit. Fixes: a03c44277253 ("phy: rockchip: Add naneng combo phy support for RK3588") Reported-by: Michal Tomek Signed-off-by: Sebastian Reichel Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240404-rk3588-pcie-bifurcation-fixes-v1-3-9907136eeafd@kernel.org Signed-off-by: Vinod Koul --- .../rockchip/phy-rockchip-naneng-combphy.c | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c index 76b9cf417591..bf74e429ff46 100644 --- a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c +++ b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c @@ -125,12 +125,15 @@ struct rockchip_combphy_grfcfg { }; struct rockchip_combphy_cfg { + unsigned int num_phys; + unsigned int phy_ids[3]; const struct rockchip_combphy_grfcfg *grfcfg; int (*combphy_cfg)(struct rockchip_combphy_priv *priv); }; struct rockchip_combphy_priv { u8 type; + int id; void __iomem *mmio; int num_clks; struct clk_bulk_data *clks; @@ -320,7 +323,7 @@ static int rockchip_combphy_probe(struct platform_device *pdev) struct rockchip_combphy_priv *priv; const struct rockchip_combphy_cfg *phy_cfg; struct resource *res; - int ret; + int ret, id; phy_cfg = of_device_get_match_data(dev); if (!phy_cfg) { @@ -338,6 +341,15 @@ static int rockchip_combphy_probe(struct platform_device *pdev) return ret; } + /* find the phy-id from the io address */ + priv->id = -ENODEV; + for (id = 0; id < phy_cfg->num_phys; id++) { + if (res->start == phy_cfg->phy_ids[id]) { + priv->id = id; + break; + } + } + priv->dev = dev; priv->type = PHY_NONE; priv->cfg = phy_cfg; @@ -562,6 +574,12 @@ static const struct rockchip_combphy_grfcfg rk3568_combphy_grfcfgs = { }; static const struct rockchip_combphy_cfg rk3568_combphy_cfgs = { + .num_phys = 3, + .phy_ids = { + 0xfe820000, + 0xfe830000, + 0xfe840000, + }, .grfcfg = &rk3568_combphy_grfcfgs, .combphy_cfg = rk3568_combphy_cfg, }; @@ -578,8 +596,14 @@ static int rk3588_combphy_cfg(struct rockchip_combphy_priv *priv) rockchip_combphy_param_write(priv->phy_grf, &cfg->con1_for_pcie, true); rockchip_combphy_param_write(priv->phy_grf, &cfg->con2_for_pcie, true); rockchip_combphy_param_write(priv->phy_grf, &cfg->con3_for_pcie, true); - rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l0_sel, true); - rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l1_sel, true); + switch (priv->id) { + case 1: + rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l0_sel, true); + break; + case 2: + rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l1_sel, true); + break; + } break; case PHY_TYPE_USB3: /* Set SSC downward spread spectrum */ @@ -736,6 +760,12 @@ static const struct rockchip_combphy_grfcfg rk3588_combphy_grfcfgs = { }; static const struct rockchip_combphy_cfg rk3588_combphy_cfgs = { + .num_phys = 3, + .phy_ids = { + 0xfee00000, + 0xfee10000, + 0xfee20000, + }, .grfcfg = &rk3588_combphy_grfcfgs, .combphy_cfg = rk3588_combphy_cfg, }; From 5abed58a8bde6d349bde364a160510b5bb904d18 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 4 Apr 2024 16:43:44 -0700 Subject: [PATCH 055/313] phy: qcom: qmp-combo: Fix VCO div offset on v3 Commit ec17373aebd0 ("phy: qcom: qmp-combo: extract common function to setup clocks") changed the offset that is used to write to DP_PHY_VCO_DIV from QSERDES_V3_DP_PHY_VCO_DIV to QSERDES_V4_DP_PHY_VCO_DIV. Unfortunately, this offset is different between v3 and v4 phys: #define QSERDES_V3_DP_PHY_VCO_DIV 0x064 #define QSERDES_V4_DP_PHY_VCO_DIV 0x070 meaning that we write the wrong register on v3 phys now. Add another generic register to 'regs' and use it here instead of a version specific define to fix this. This was discovered after Abhinav looked over register dumps with me from sc7180 Trogdor devices that started failing to light up the external display with v6.6 based kernels. It turns out that some monitors are very specific about their link clk frequency and if the default power on reset value is still there the monitor will show a blank screen or a garbled display. Other monitors are perfectly happy to get a bad clock signal. Cc: Douglas Anderson Cc: Abhinav Kumar Cc: Dmitry Baryshkov Fixes: ec17373aebd0 ("phy: qcom: qmp-combo: extract common function to setup clocks") Signed-off-by: Stephen Boyd Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240404234345.1446300-1-swboyd@chromium.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 7d585a4bbbba..3b19d8ebf467 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -77,6 +77,7 @@ enum qphy_reg_layout { QPHY_COM_BIAS_EN_CLKBUFLR_EN, QPHY_DP_PHY_STATUS, + QPHY_DP_PHY_VCO_DIV, QPHY_TX_TX_POL_INV, QPHY_TX_TX_DRV_LVL, @@ -102,6 +103,7 @@ static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN, [QPHY_DP_PHY_STATUS] = QSERDES_V3_DP_PHY_STATUS, + [QPHY_DP_PHY_VCO_DIV] = QSERDES_V3_DP_PHY_VCO_DIV, [QPHY_TX_TX_POL_INV] = QSERDES_V3_TX_TX_POL_INV, [QPHY_TX_TX_DRV_LVL] = QSERDES_V3_TX_TX_DRV_LVL, @@ -126,6 +128,7 @@ static const unsigned int qmp_v45_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN, [QPHY_DP_PHY_STATUS] = QSERDES_V4_DP_PHY_STATUS, + [QPHY_DP_PHY_VCO_DIV] = QSERDES_V4_DP_PHY_VCO_DIV, [QPHY_TX_TX_POL_INV] = QSERDES_V4_TX_TX_POL_INV, [QPHY_TX_TX_DRV_LVL] = QSERDES_V4_TX_TX_DRV_LVL, @@ -2162,6 +2165,7 @@ static int qmp_combo_configure_dp_clocks(struct qmp_combo *qmp) const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 phy_vco_div; unsigned long pixel_freq; + const struct qmp_phy_cfg *cfg = qmp->cfg; switch (dp_opts->link_rate) { case 1620: @@ -2184,7 +2188,7 @@ static int qmp_combo_configure_dp_clocks(struct qmp_combo *qmp) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_VCO_DIV); + writel(phy_vco_div, qmp->dp_dp_phy + cfg->regs[QPHY_DP_PHY_VCO_DIV]); clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq); From ee13e1f3c72b9464a4d73017c060ab503eed653a Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 4 Apr 2024 17:01:03 -0700 Subject: [PATCH 056/313] phy: qcom: qmp-combo: Fix register base for QSERDES_DP_PHY_MODE The register base that was used to write to the QSERDES_DP_PHY_MODE register was 'dp_dp_phy' before commit 815891eee668 ("phy: qcom-qmp-combo: Introduce orientation variable"). There isn't any explanation in the commit why this is changed, so I suspect it was an oversight or happened while being extracted from some other series. Oddly the value being 0x4c or 0x5c doesn't seem to matter for me, so I suspect this is dead code, but that can be fixed in another patch. It's not good to write to the wrong register space, and maybe some other version of this phy relies on this. Cc: Douglas Anderson Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Neil Armstrong Cc: Abel Vesa Cc: Steev Klimaszewski Cc: Johan Hovold Cc: Bjorn Andersson Cc: stable@vger.kernel.org # 6.5 Fixes: 815891eee668 ("phy: qcom-qmp-combo: Introduce orientation variable") Signed-off-by: Stephen Boyd Reviewed-by: Abhinav Kumar Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20240405000111.1450598-1-swboyd@chromium.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 3b19d8ebf467..2a6f70b3e25f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2153,9 +2153,9 @@ static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp) writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); if (reverse) - writel(0x4c, qmp->pcs + QSERDES_DP_PHY_MODE); + writel(0x4c, qmp->dp_dp_phy + QSERDES_DP_PHY_MODE); else - writel(0x5c, qmp->pcs + QSERDES_DP_PHY_MODE); + writel(0x5c, qmp->dp_dp_phy + QSERDES_DP_PHY_MODE); return reverse; } From 47b3e2f3914ae5e8d9025d65ae5cffcbb54bc9c3 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Sat, 6 Apr 2024 15:37:09 +0200 Subject: [PATCH 057/313] phy: qcom: m31: match requested regulator name with dt schema According to the 'qcom,ipq5332-usb-hsphy.yaml' schema, the 5V supply regulator must be defined via the 'vdd-supply' property. The driver however requests for the 'vdda-phy' regulator which results in the following message when the driver is probed on a IPQ5018 based board with a device tree matching to the schema: qcom-m31usb-phy 5b000.phy: supply vdda-phy not found, using dummy regulator qcom-m31usb-phy 5b000.phy: Registered M31 USB phy This means that the regulator specified in the device tree never gets enabled. Change the driver to use the 'vdd' name for the regulator as per defined in the schema in order to ensure that the corresponding regulator gets enabled. Fixes: 08e49af50701 ("phy: qcom: Introduce M31 USB PHY driver") Reviewed-by: Varadarajan Narayanan Signed-off-by: Gabor Juhos Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240406-phy-qcom-m31-regulator-fix-v2-1-c8e9795bc071@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index 03fb0d4b75d7..20d4c020a83c 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -297,7 +297,7 @@ static int m31usb_phy_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(qphy->phy), "failed to create phy\n"); - qphy->vreg = devm_regulator_get(dev, "vdda-phy"); + qphy->vreg = devm_regulator_get(dev, "vdd"); if (IS_ERR(qphy->vreg)) return dev_err_probe(dev, PTR_ERR(qphy->vreg), "failed to get vreg\n"); From d5638de827cff0fce77007e426ec0ffdedf68a44 Mon Sep 17 00:00:00 2001 From: Rex Zhang Date: Thu, 4 Apr 2024 15:39:49 -0700 Subject: [PATCH 058/313] dmaengine: idxd: Convert spinlock to mutex to lock evl workqueue drain_workqueue() cannot be called safely in a spinlocked context due to possible task rescheduling. In the multi-task scenario, calling queue_work() while drain_workqueue() will lead to a Call Trace as pushing a work on a draining workqueue is not permitted in spinlocked context. Call Trace: ? __warn+0x7d/0x140 ? __queue_work+0x2b2/0x440 ? report_bug+0x1f8/0x200 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? __queue_work+0x2b2/0x440 queue_work_on+0x28/0x30 idxd_misc_thread+0x303/0x5a0 [idxd] ? __schedule+0x369/0xb40 ? __pfx_irq_thread_fn+0x10/0x10 ? irq_thread+0xbc/0x1b0 irq_thread_fn+0x21/0x70 irq_thread+0x102/0x1b0 ? preempt_count_add+0x74/0xa0 ? __pfx_irq_thread_dtor+0x10/0x10 ? __pfx_irq_thread+0x10/0x10 kthread+0x103/0x140 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 The current implementation uses a spinlock to protect event log workqueue and will lead to the Call Trace due to potential task rescheduling. To address the locking issue, convert the spinlock to mutex, allowing the drain_workqueue() to be called in a safe mutex-locked context. This change ensures proper synchronization when accessing the event log workqueue, preventing potential Call Trace and improving the overall robustness of the code. Fixes: c40bd7d9737b ("dmaengine: idxd: process user page faults for completion record") Signed-off-by: Rex Zhang Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Reviewed-by: Lijun Pan Link: https://lore.kernel.org/r/20240404223949.2885604-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 5 ++--- drivers/dma/idxd/debugfs.c | 4 ++-- drivers/dma/idxd/device.c | 8 ++++---- drivers/dma/idxd/idxd.h | 2 +- drivers/dma/idxd/init.c | 2 +- drivers/dma/idxd/irq.c | 4 ++-- 6 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 8078ab9acfbc..c095a2c8f659 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -342,7 +342,7 @@ static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) if (!evl) return; - spin_lock(&evl->lock); + mutex_lock(&evl->lock); status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = status.tail; h = status.head; @@ -354,9 +354,8 @@ static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) set_bit(h, evl->bmap); h = (h + 1) % size; } - spin_unlock(&evl->lock); - drain_workqueue(wq->wq); + mutex_unlock(&evl->lock); } static int idxd_cdev_release(struct inode *node, struct file *filep) diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c index f3f25ee676f3..ad4245cb301d 100644 --- a/drivers/dma/idxd/debugfs.c +++ b/drivers/dma/idxd/debugfs.c @@ -66,7 +66,7 @@ static int debugfs_evl_show(struct seq_file *s, void *d) if (!evl || !evl->log) return 0; - spin_lock(&evl->lock); + mutex_lock(&evl->lock); evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = evl_status.tail; @@ -87,7 +87,7 @@ static int debugfs_evl_show(struct seq_file *s, void *d) dump_event_entry(idxd, s, i, &count, processed); } - spin_unlock(&evl->lock); + mutex_unlock(&evl->lock); return 0; } diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index ecfdf4a8f1f8..c41ef195eeb9 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -775,7 +775,7 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) goto err_alloc; } - spin_lock(&evl->lock); + mutex_lock(&evl->lock); evl->log = addr; evl->dma = dma_addr; evl->log_size = size; @@ -796,7 +796,7 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) gencfg.evl_en = 1; iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); - spin_unlock(&evl->lock); + mutex_unlock(&evl->lock); return 0; err_alloc: @@ -819,7 +819,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd) if (!gencfg.evl_en) return; - spin_lock(&evl->lock); + mutex_lock(&evl->lock); gencfg.evl_en = 0; iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); @@ -836,7 +836,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd) evl_dma = evl->dma; evl->log = NULL; evl->size = IDXD_EVL_SIZE_MIN; - spin_unlock(&evl->lock); + mutex_unlock(&evl->lock); dma_free_coherent(dev, evl_log_size, evl_log, evl_dma); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index a4099a1e2340..7b98944135eb 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -293,7 +293,7 @@ struct idxd_driver_data { struct idxd_evl { /* Lock to protect event log access. */ - spinlock_t lock; + struct mutex lock; void *log; dma_addr_t dma; /* Total size of event log = number of entries * entry size. */ diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 4954adc6bb60..264c4e47d7cc 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -354,7 +354,7 @@ static int idxd_init_evl(struct idxd_device *idxd) if (!evl) return -ENOMEM; - spin_lock_init(&evl->lock); + mutex_init(&evl->lock); evl->size = IDXD_EVL_SIZE_MIN; idxd_name = dev_name(idxd_confdev(idxd)); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 348aa21389a9..8dc029c86551 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -363,7 +363,7 @@ static void process_evl_entries(struct idxd_device *idxd) evl_status.bits = 0; evl_status.int_pending = 1; - spin_lock(&evl->lock); + mutex_lock(&evl->lock); /* Clear interrupt pending bit */ iowrite32(evl_status.bits_upper32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); @@ -380,7 +380,7 @@ static void process_evl_entries(struct idxd_device *idxd) evl_status.head = h; iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); - spin_unlock(&evl->lock); + mutex_unlock(&evl->lock); } irqreturn_t idxd_misc_thread(int vec, void *data) From 244296cc3a155199a8b080d19e645d7d49081a38 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 8 Mar 2024 16:00:32 -0500 Subject: [PATCH 059/313] dma: xilinx_dpdma: Fix locking There are several places where either chan->lock or chan->vchan.lock was not held. Add appropriate locking. This fixes lockdep warnings like [ 31.077578] ------------[ cut here ]------------ [ 31.077831] WARNING: CPU: 2 PID: 40 at drivers/dma/xilinx/xilinx_dpdma.c:834 xilinx_dpdma_chan_queue_transfer+0x274/0x5e0 [ 31.077953] Modules linked in: [ 31.078019] CPU: 2 PID: 40 Comm: kworker/u12:1 Not tainted 6.6.20+ #98 [ 31.078102] Hardware name: xlnx,zynqmp (DT) [ 31.078169] Workqueue: events_unbound deferred_probe_work_func [ 31.078272] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 31.078377] pc : xilinx_dpdma_chan_queue_transfer+0x274/0x5e0 [ 31.078473] lr : xilinx_dpdma_chan_queue_transfer+0x270/0x5e0 [ 31.078550] sp : ffffffc083bb2e10 [ 31.078590] x29: ffffffc083bb2e10 x28: 0000000000000000 x27: ffffff880165a168 [ 31.078754] x26: ffffff880164e920 x25: ffffff880164eab8 x24: ffffff880164d480 [ 31.078920] x23: ffffff880165a148 x22: ffffff880164e988 x21: 0000000000000000 [ 31.079132] x20: ffffffc082aa3000 x19: ffffff880164e880 x18: 0000000000000000 [ 31.079295] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 [ 31.079453] x14: 0000000000000000 x13: ffffff8802263dc0 x12: 0000000000000001 [ 31.079613] x11: 0001ffc083bb2e34 x10: 0001ff880164e98f x9 : 0001ffc082aa3def [ 31.079824] x8 : 0001ffc082aa3dec x7 : 0000000000000000 x6 : 0000000000000516 [ 31.079982] x5 : ffffffc7f8d43000 x4 : ffffff88003c9c40 x3 : ffffffffffffffff [ 31.080147] x2 : ffffffc7f8d43000 x1 : 00000000000000c0 x0 : 0000000000000000 [ 31.080307] Call trace: [ 31.080340] xilinx_dpdma_chan_queue_transfer+0x274/0x5e0 [ 31.080518] xilinx_dpdma_issue_pending+0x11c/0x120 [ 31.080595] zynqmp_disp_layer_update+0x180/0x3ac [ 31.080712] zynqmp_dpsub_plane_atomic_update+0x11c/0x21c [ 31.080825] drm_atomic_helper_commit_planes+0x20c/0x684 [ 31.080951] drm_atomic_helper_commit_tail+0x5c/0xb0 [ 31.081139] commit_tail+0x234/0x294 [ 31.081246] drm_atomic_helper_commit+0x1f8/0x210 [ 31.081363] drm_atomic_commit+0x100/0x140 [ 31.081477] drm_client_modeset_commit_atomic+0x318/0x384 [ 31.081634] drm_client_modeset_commit_locked+0x8c/0x24c [ 31.081725] drm_client_modeset_commit+0x34/0x5c [ 31.081812] __drm_fb_helper_restore_fbdev_mode_unlocked+0x104/0x168 [ 31.081899] drm_fb_helper_set_par+0x50/0x70 [ 31.081971] fbcon_init+0x538/0xc48 [ 31.082047] visual_init+0x16c/0x23c [ 31.082207] do_bind_con_driver.isra.0+0x2d0/0x634 [ 31.082320] do_take_over_console+0x24c/0x33c [ 31.082429] do_fbcon_takeover+0xbc/0x1b0 [ 31.082503] fbcon_fb_registered+0x2d0/0x34c [ 31.082663] register_framebuffer+0x27c/0x38c [ 31.082767] __drm_fb_helper_initial_config_and_unlock+0x5c0/0x91c [ 31.082939] drm_fb_helper_initial_config+0x50/0x74 [ 31.083012] drm_fbdev_dma_client_hotplug+0xb8/0x108 [ 31.083115] drm_client_register+0xa0/0xf4 [ 31.083195] drm_fbdev_dma_setup+0xb0/0x1cc [ 31.083293] zynqmp_dpsub_drm_init+0x45c/0x4e0 [ 31.083431] zynqmp_dpsub_probe+0x444/0x5e0 [ 31.083616] platform_probe+0x8c/0x13c [ 31.083713] really_probe+0x258/0x59c [ 31.083793] __driver_probe_device+0xc4/0x224 [ 31.083878] driver_probe_device+0x70/0x1c0 [ 31.083961] __device_attach_driver+0x108/0x1e0 [ 31.084052] bus_for_each_drv+0x9c/0x100 [ 31.084125] __device_attach+0x100/0x298 [ 31.084207] device_initial_probe+0x14/0x20 [ 31.084292] bus_probe_device+0xd8/0xdc [ 31.084368] deferred_probe_work_func+0x11c/0x180 [ 31.084451] process_one_work+0x3ac/0x988 [ 31.084643] worker_thread+0x398/0x694 [ 31.084752] kthread+0x1bc/0x1c0 [ 31.084848] ret_from_fork+0x10/0x20 [ 31.084932] irq event stamp: 64549 [ 31.084970] hardirqs last enabled at (64548): [] _raw_spin_unlock_irqrestore+0x80/0x90 [ 31.085157] hardirqs last disabled at (64549): [] _raw_spin_lock_irqsave+0xc0/0xdc [ 31.085277] softirqs last enabled at (64503): [] __do_softirq+0x47c/0x500 [ 31.085390] softirqs last disabled at (64498): [] ____do_softirq+0x10/0x1c [ 31.085501] ---[ end trace 0000000000000000 ]--- Fixes: 7cbb0c63de3f ("dmaengine: xilinx: dpdma: Add the Xilinx DisplayPort DMA engine driver") Signed-off-by: Sean Anderson Reviewed-by: Tomi Valkeinen Link: https://lore.kernel.org/r/20240308210034.3634938-2-sean.anderson@linux.dev Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dpdma.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index b82815e64d24..eb0637d90342 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -214,7 +214,8 @@ struct xilinx_dpdma_tx_desc { * @running: true if the channel is running * @first_frame: flag for the first frame of stream * @video_group: flag if multi-channel operation is needed for video channels - * @lock: lock to access struct xilinx_dpdma_chan + * @lock: lock to access struct xilinx_dpdma_chan. Must be taken before + * @vchan.lock, if both are to be held. * @desc_pool: descriptor allocation pool * @err_task: error IRQ bottom half handler * @desc: References to descriptors being processed @@ -1097,12 +1098,14 @@ static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan) * Complete the active descriptor, if any, promote the pending * descriptor to active, and queue the next transfer, if any. */ + spin_lock(&chan->vchan.lock); if (chan->desc.active) vchan_cookie_complete(&chan->desc.active->vdesc); chan->desc.active = pending; chan->desc.pending = NULL; xilinx_dpdma_chan_queue_transfer(chan); + spin_unlock(&chan->vchan.lock); out: spin_unlock_irqrestore(&chan->lock, flags); @@ -1264,10 +1267,12 @@ static void xilinx_dpdma_issue_pending(struct dma_chan *dchan) struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan); unsigned long flags; - spin_lock_irqsave(&chan->vchan.lock, flags); + spin_lock_irqsave(&chan->lock, flags); + spin_lock(&chan->vchan.lock); if (vchan_issue_pending(&chan->vchan)) xilinx_dpdma_chan_queue_transfer(chan); - spin_unlock_irqrestore(&chan->vchan.lock, flags); + spin_unlock(&chan->vchan.lock); + spin_unlock_irqrestore(&chan->lock, flags); } static int xilinx_dpdma_config(struct dma_chan *dchan, @@ -1495,7 +1500,9 @@ static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t) XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id); spin_lock_irqsave(&chan->lock, flags); + spin_lock(&chan->vchan.lock); xilinx_dpdma_chan_queue_transfer(chan); + spin_unlock(&chan->vchan.lock); spin_unlock_irqrestore(&chan->lock, flags); } From 5b9706bfc094314c600ab810a61208a7cbaa4cb3 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 27 Mar 2024 10:58:48 +0100 Subject: [PATCH 060/313] dmaengine: xilinx: xdma: Fix wrong offsets in the buffers addresses in dma descriptor The addition of interleaved transfers slightly changed the way addresses inside DMA descriptors are derived, breaking cyclic transfers. Fixes: 3e184e64c2e5 ("dmaengine: xilinx: xdma: Prepare the introduction of interleaved DMA transfers") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Signed-off-by: Louis Chauvet Link: https://lore.kernel.org/r/20240327-digigram-xdma-fixes-v1-1-45f4a52c0283@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index 170017ff2aad..b9788aa8f6b7 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c @@ -704,7 +704,7 @@ xdma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t address, desc_num = 0; for (i = 0; i < periods; i++) { desc_num += xdma_fill_descs(sw_desc, *src, *dst, period_size, desc_num); - addr += i * period_size; + addr += period_size; } tx_desc = vchan_tx_prep(&xdma_chan->vchan, &sw_desc->vdesc, flags); From 6a40fb8245965b481b4dcce011cd63f20bf91ee0 Mon Sep 17 00:00:00 2001 From: Louis Chauvet Date: Wed, 27 Mar 2024 10:58:49 +0100 Subject: [PATCH 061/313] dmaengine: xilinx: xdma: Fix synchronization issue The current xdma_synchronize method does not properly wait for the last transfer to be done. Due to limitations of the XMDA engine, it is not possible to stop a transfer in the middle of a descriptor. Said otherwise, if a stop is requested at the end of descriptor "N" and the OS is fast enough, the DMA controller will effectively stop immediately. However, if the OS is slightly too slow to request the stop and the DMA engine starts descriptor "N+1", the N+1 transfer will be performed until its end. This means that after a terminate_all, the last descriptor must remain valid and the synchronization must wait for this last descriptor to be terminated. Fixes: 855c2e1d1842 ("dmaengine: xilinx: xdma: Rework xdma_terminate_all()") Fixes: f5c392d106e7 ("dmaengine: xilinx: xdma: Add terminate_all/synchronize callbacks") Cc: stable@vger.kernel.org Suggested-by: Miquel Raynal Signed-off-by: Louis Chauvet Link: https://lore.kernel.org/r/20240327-digigram-xdma-fixes-v1-2-45f4a52c0283@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xdma-regs.h | 3 +++ drivers/dma/xilinx/xdma.c | 26 ++++++++++++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/dma/xilinx/xdma-regs.h b/drivers/dma/xilinx/xdma-regs.h index 98f5f6fb9ff9..6ad08878e938 100644 --- a/drivers/dma/xilinx/xdma-regs.h +++ b/drivers/dma/xilinx/xdma-regs.h @@ -117,6 +117,9 @@ struct xdma_hw_desc { CHAN_CTRL_IE_WRITE_ERROR | \ CHAN_CTRL_IE_DESC_ERROR) +/* bits of the channel status register */ +#define XDMA_CHAN_STATUS_BUSY BIT(0) + #define XDMA_CHAN_STATUS_MASK CHAN_CTRL_START #define XDMA_CHAN_ERROR_MASK (CHAN_CTRL_IE_DESC_ALIGN_MISMATCH | \ diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index b9788aa8f6b7..5a3a3293b21b 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c @@ -71,6 +71,8 @@ struct xdma_chan { enum dma_transfer_direction dir; struct dma_slave_config cfg; u32 irq; + struct completion last_interrupt; + bool stop_requested; }; /** @@ -376,6 +378,8 @@ static int xdma_xfer_start(struct xdma_chan *xchan) return ret; xchan->busy = true; + xchan->stop_requested = false; + reinit_completion(&xchan->last_interrupt); return 0; } @@ -387,7 +391,6 @@ static int xdma_xfer_start(struct xdma_chan *xchan) static int xdma_xfer_stop(struct xdma_chan *xchan) { int ret; - u32 val; struct xdma_device *xdev = xchan->xdev_hdl; /* clear run stop bit to prevent any further auto-triggering */ @@ -395,13 +398,7 @@ static int xdma_xfer_stop(struct xdma_chan *xchan) CHAN_CTRL_RUN_STOP); if (ret) return ret; - - /* Clear the channel status register */ - ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_STATUS_RC, &val); - if (ret) - return ret; - - return 0; + return ret; } /** @@ -474,6 +471,8 @@ static int xdma_alloc_channels(struct xdma_device *xdev, xchan->xdev_hdl = xdev; xchan->base = base + i * XDMA_CHAN_STRIDE; xchan->dir = dir; + xchan->stop_requested = false; + init_completion(&xchan->last_interrupt); ret = xdma_channel_init(xchan); if (ret) @@ -521,6 +520,7 @@ static int xdma_terminate_all(struct dma_chan *chan) spin_lock_irqsave(&xdma_chan->vchan.lock, flags); xdma_chan->busy = false; + xdma_chan->stop_requested = true; vd = vchan_next_desc(&xdma_chan->vchan); if (vd) { list_del(&vd->node); @@ -542,6 +542,13 @@ static int xdma_terminate_all(struct dma_chan *chan) static void xdma_synchronize(struct dma_chan *chan) { struct xdma_chan *xdma_chan = to_xdma_chan(chan); + struct xdma_device *xdev = xdma_chan->xdev_hdl; + int st = 0; + + /* If the engine continues running, wait for the last interrupt */ + regmap_read(xdev->rmap, xdma_chan->base + XDMA_CHAN_STATUS, &st); + if (st & XDMA_CHAN_STATUS_BUSY) + wait_for_completion_timeout(&xdma_chan->last_interrupt, msecs_to_jiffies(1000)); vchan_synchronize(&xdma_chan->vchan); } @@ -876,6 +883,9 @@ static irqreturn_t xdma_channel_isr(int irq, void *dev_id) u32 st; bool repeat_tx; + if (xchan->stop_requested) + complete(&xchan->last_interrupt); + spin_lock(&xchan->vchan.lock); /* get submitted request */ From 7a71c6dc21d5ae83ab27c39a67845d6d23ac271f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 27 Mar 2024 10:58:50 +0100 Subject: [PATCH 062/313] dmaengine: xilinx: xdma: Clarify kdoc in XDMA driver Clarify the kernel doc of xdma_fill_descs(), especially how big chunks will be handled. Signed-off-by: Miquel Raynal Signed-off-by: Louis Chauvet Link: https://lore.kernel.org/stable/20240327-digigram-xdma-fixes-v1-3-45f4a52c0283%40bootlin.com Link: https://lore.kernel.org/r/20240327-digigram-xdma-fixes-v1-3-45f4a52c0283@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xdma.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index 5a3a3293b21b..313b217388fe 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c @@ -554,12 +554,14 @@ static void xdma_synchronize(struct dma_chan *chan) } /** - * xdma_fill_descs - Fill hardware descriptors with contiguous memory block addresses - * @sw_desc: tx descriptor state container - * @src_addr: Value for a ->src_addr field of a first descriptor - * @dst_addr: Value for a ->dst_addr field of a first descriptor - * @size: Total size of a contiguous memory block - * @filled_descs_num: Number of filled hardware descriptors for corresponding sw_desc + * xdma_fill_descs() - Fill hardware descriptors for one contiguous memory chunk. + * More than one descriptor will be used if the size is bigger + * than XDMA_DESC_BLEN_MAX. + * @sw_desc: Descriptor container + * @src_addr: First value for the ->src_addr field + * @dst_addr: First value for the ->dst_addr field + * @size: Size of the contiguous memory block + * @filled_descs_num: Index of the first descriptor to take care of in @sw_desc */ static inline u32 xdma_fill_descs(struct xdma_desc *sw_desc, u64 src_addr, u64 dst_addr, u32 size, u32 filled_descs_num) From f221033f5c24659dc6ad7e5cf18fb1b075f4a8be Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 13 Mar 2024 14:40:31 -0700 Subject: [PATCH 063/313] dmaengine: idxd: Fix oops during rmmod on single-CPU platforms During the removal of the idxd driver, registered offline callback is invoked as part of the clean up process. However, on systems with only one CPU online, no valid target is available to migrate the perf context, resulting in a kernel oops: BUG: unable to handle page fault for address: 000000000002a2b8 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 1470e1067 P4D 0 Oops: 0002 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 20 Comm: cpuhp/0 Not tainted 6.8.0-rc6-dsa+ #57 Hardware name: Intel Corporation AvenueCity/AvenueCity, BIOS BHSDCRB1.86B.2492.D03.2307181620 07/18/2023 RIP: 0010:mutex_lock+0x2e/0x50 ... Call Trace: __die+0x24/0x70 page_fault_oops+0x82/0x160 do_user_addr_fault+0x65/0x6b0 __pfx___rdmsr_safe_on_cpu+0x10/0x10 exc_page_fault+0x7d/0x170 asm_exc_page_fault+0x26/0x30 mutex_lock+0x2e/0x50 mutex_lock+0x1e/0x50 perf_pmu_migrate_context+0x87/0x1f0 perf_event_cpu_offline+0x76/0x90 [idxd] cpuhp_invoke_callback+0xa2/0x4f0 __pfx_perf_event_cpu_offline+0x10/0x10 [idxd] cpuhp_thread_fun+0x98/0x150 smpboot_thread_fn+0x27/0x260 smpboot_thread_fn+0x1af/0x260 __pfx_smpboot_thread_fn+0x10/0x10 kthread+0x103/0x140 __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fix the issue by preventing the migration of the perf context to an invalid target. Fixes: 81dd4d4d6178 ("dmaengine: idxd: Add IDXD performance monitor support") Reported-by: Terrence Xu Tested-by: Terrence Xu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20240313214031.1658045-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/perfmon.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c index fdda6d604262..5e94247e1ea7 100644 --- a/drivers/dma/idxd/perfmon.c +++ b/drivers/dma/idxd/perfmon.c @@ -528,14 +528,11 @@ static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node) return 0; target = cpumask_any_but(cpu_online_mask, cpu); - /* migrate events if there is a valid target */ - if (target < nr_cpu_ids) + if (target < nr_cpu_ids) { cpumask_set_cpu(target, &perfmon_dsa_cpu_mask); - else - target = -1; - - perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + } return 0; } From 7c1c73bf84c50b641449f9811e2196cdc3ca4a1b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Mar 2024 21:38:58 +0100 Subject: [PATCH 064/313] wifi: mac80211: check EHT/TTLM action frame length Check the EHT action frame length before accessing the action code, if it's not present then the frame cannot be valid. Reported-by: syzbot+75af45a00cf13243ba39@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/0000000000006c06870614886611@google.com/ Fixes: 8f500fbc6c65 ("wifi: mac80211: process and save negotiated TID to Link mapping request") Link: https://msgid.link/20240326213858.19c84f34349f.I71b439f016b28f65284bb7646fe36343b74cbc9a@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c1f850138405..685185dc04f9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3780,6 +3780,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; case WLAN_CATEGORY_PROTECTED_EHT: + if (len < offsetofend(typeof(*mgmt), + u.action.u.ttlm_req.action_code)) + break; + switch (mgmt->u.action.u.ttlm_req.action_code) { case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ: if (sdata->vif.type != NL80211_IFTYPE_STATION) From ab9177d83c040eba58387914077ebca56f14fae6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Mar 2024 22:08:54 +0100 Subject: [PATCH 065/313] wifi: mac80211: don't use rate mask for scanning The rate mask is intended for use during operation, and can be set to only have masks for the currently active band. As such, it cannot be used for scanning which can be on other bands as well. Simply ignore the rate masks during scanning to avoid warnings from incorrect settings. Reported-by: syzbot+fdc5123366fb9c3fdc6d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fdc5123366fb9c3fdc6d Co-developed-by: Dmitry Antipov Signed-off-by: Dmitry Antipov Tested-by: Dmitry Antipov Link: https://msgid.link/20240326220854.9594cbb418ca.I7f86c0ba1f98cf7e27c2bacf6c2d417200ecea5c@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 +++ net/mac80211/rate.c | 6 +++++- net/mac80211/scan.c | 1 + net/mac80211/tx.c | 13 +++++++++---- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 353488ab94a2..2d7f87bc5324 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -953,6 +953,8 @@ enum mac80211_tx_info_flags { * of their QoS TID or other priority field values. * @IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX: first MLO TX, used mostly internally * for sequence number assignment + * @IEEE80211_TX_CTRL_SCAN_TX: Indicates that this frame is transmitted + * due to scanning, not in normal operation on the interface. * @IEEE80211_TX_CTRL_MLO_LINK: If not @IEEE80211_LINK_UNSPECIFIED, this * frame should be transmitted on the specific link. This really is * only relevant for frames that do not have data present, and is @@ -973,6 +975,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_NO_SEQNO = BIT(7), IEEE80211_TX_CTRL_DONT_REORDER = BIT(8), IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX = BIT(9), + IEEE80211_TX_CTRL_SCAN_TX = BIT(10), IEEE80211_TX_CTRL_MLO_LINK = 0xf0000000, }; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 23404b275457..4dc1def69548 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -877,6 +877,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, struct ieee80211_sub_if_data *sdata; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_supported_band *sband; + u32 mask = ~0; rate_control_fill_sta_table(sta, info, dest, max_rates); @@ -889,9 +890,12 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, if (ieee80211_is_tx_data(skb)) rate_control_apply_mask(sdata, sta, sband, dest, max_rates); + if (!(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) + mask = sdata->rc_rateidx_mask[info->band]; + if (dest[0].idx < 0) __rate_control_send_low(&sdata->local->hw, sband, sta, info, - sdata->rc_rateidx_mask[info->band]); + mask); if (sta) rate_fixup_ratelist(vif, sband, info, dest, max_rates); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 0429e59ba387..73850312580f 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -648,6 +648,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, cpu_to_le16(IEEE80211_SN_TO_SEQ(sn)); } IEEE80211_SKB_CB(skb)->flags |= tx_flags; + IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_SCAN_TX; ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band); } } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6bf223e6cd1a..cfd0a62d0152 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -698,11 +698,16 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.bss_conf = &tx->sdata->vif.bss_conf; txrc.skb = tx->skb; txrc.reported_rate.idx = -1; - txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; - if (tx->sdata->rc_has_mcs_mask[info->band]) - txrc.rate_idx_mcs_mask = - tx->sdata->rc_rateidx_mcs_mask[info->band]; + if (unlikely(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) { + txrc.rate_idx_mask = ~0; + } else { + txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; + + if (tx->sdata->rc_has_mcs_mask[info->band]) + txrc.rate_idx_mcs_mask = + tx->sdata->rc_rateidx_mcs_mask[info->band]; + } txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT || From d12b9779cc9ba29d65fbfc728eb8a037871dd331 Mon Sep 17 00:00:00 2001 From: Richard Kinder Date: Thu, 28 Mar 2024 11:57:25 +1100 Subject: [PATCH 066/313] wifi: mac80211: ensure beacon is non-S1G prior to extracting the beacon timestamp field Logic inside ieee80211_rx_mgmt_beacon accesses the mgmt->u.beacon.timestamp field without first checking whether the beacon received is non-S1G format. Fix the problem by checking the beacon is non-S1G format to avoid access of the mgmt->u.beacon.timestamp field. Signed-off-by: Richard Kinder Link: https://msgid.link/20240328005725.85355-1-richard.kinder@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 96b70006b7fc..db7128f6c901 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6193,7 +6193,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, link->u.mgd.dtim_period = elems->dtim_period; link->u.mgd.have_beacon = true; ifmgd->assoc_data->need_beacon = false; - if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) { + if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY) && + !ieee80211_is_s1g_beacon(hdr->frame_control)) { link->conf->sync_tsf = le64_to_cpu(mgmt->u.beacon.timestamp); link->conf->sync_device_ts = From 9ef369973cd2c97cce3388d2c0c7e3c056656e8a Mon Sep 17 00:00:00 2001 From: Igor Artemiev Date: Fri, 5 Apr 2024 18:24:30 +0300 Subject: [PATCH 067/313] wifi: cfg80211: fix the order of arguments for trace events of the tx_rx_evt class The declarations of the tx_rx_evt class and the rdev_set_antenna event use the wrong order of arguments in the TP_ARGS macro. Fix the order of arguments in the TP_ARGS macro. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Igor Artemiev Link: https://msgid.link/20240405152431.270267-1-Igor.A.Artemiev@mcst.ru Signed-off-by: Johannes Berg --- net/wireless/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index cbbf347c6b2e..df013c98b80d 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1758,7 +1758,7 @@ TRACE_EVENT(rdev_return_void_tx_rx, DECLARE_EVENT_CLASS(tx_rx_evt, TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx), - TP_ARGS(wiphy, rx, tx), + TP_ARGS(wiphy, tx, rx), TP_STRUCT__entry( WIPHY_ENTRY __field(u32, tx) @@ -1775,7 +1775,7 @@ DECLARE_EVENT_CLASS(tx_rx_evt, DEFINE_EVENT(tx_rx_evt, rdev_set_antenna, TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx), - TP_ARGS(wiphy, rx, tx) + TP_ARGS(wiphy, tx, rx) ); DECLARE_EVENT_CLASS(wiphy_netdev_id_evt, From b61bb5bc2c1cd00bb53db42f705735db6e8700f0 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 4 Apr 2024 10:31:55 +0200 Subject: [PATCH 068/313] mtd: rawnand: qcom: Fix broken OP_RESET_DEVICE command in qcom_misc_cmd_type_exec() While migrating to exec_ops in commit a82990c8a409 ("mtd: rawnand: qcom: Add read/read_start ops in exec_op path"), OP_RESET_DEVICE command handling got broken unintentionally. Right now for the OP_RESET_DEVICE command, qcom_misc_cmd_type_exec() will simply return 0 without handling it. Even, if that gets fixed, an unnecessary FLASH_STATUS read descriptor command is being added in the middle and that seems to be causing the command to fail on IPQ806x devices. So let's fix the above two issues to make OP_RESET_DEVICE command working again. Fixes: a82990c8a409 ("mtd: rawnand: qcom: Add read/read_start ops in exec_op path") Cc: stable@vger.kernel.org Reviewed-by: Manivannan Sadhasivam Signed-off-by: Christian Marangi Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240404083157.940-1-ansuelsmth@gmail.com --- drivers/mtd/nand/raw/qcom_nandc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index b079605c84d3..b8cff9240b28 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -2815,7 +2815,7 @@ static int qcom_misc_cmd_type_exec(struct nand_chip *chip, const struct nand_sub host->cfg0_raw & ~(7 << CW_PER_PAGE)); nandc_set_reg(chip, NAND_DEV0_CFG1, host->cfg1_raw); instrs = 3; - } else { + } else if (q_op.cmd_reg != OP_RESET_DEVICE) { return 0; } @@ -2830,9 +2830,8 @@ static int qcom_misc_cmd_type_exec(struct nand_chip *chip, const struct nand_sub nandc_set_reg(chip, NAND_EXEC_CMD, 1); write_reg_dma(nandc, NAND_FLASH_CMD, instrs, NAND_BAM_NEXT_SGL); - (q_op.cmd_reg == OP_BLOCK_ERASE) ? write_reg_dma(nandc, NAND_DEV0_CFG0, - 2, NAND_BAM_NEXT_SGL) : read_reg_dma(nandc, - NAND_FLASH_STATUS, 1, NAND_BAM_NEXT_SGL); + if (q_op.cmd_reg == OP_BLOCK_ERASE) + write_reg_dma(nandc, NAND_DEV0_CFG0, 2, NAND_BAM_NEXT_SGL); write_reg_dma(nandc, NAND_EXEC_CMD, 1, NAND_BAM_NEXT_SGL); read_reg_dma(nandc, NAND_FLASH_STATUS, 1, NAND_BAM_NEXT_SGL); From 21c9fb611c25d5cd038f6fe485232e7884bb0b3d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Apr 2024 16:30:04 +0200 Subject: [PATCH 069/313] mtd: diskonchip: work around ubsan link failure I ran into a randconfig build failure with UBSAN using gcc-13.2: arm-linux-gnueabi-ld: error: unplaced orphan section `.bss..Lubsan_data31' from `drivers/mtd/nand/raw/diskonchip.o' I'm not entirely sure what is going on here, but I suspect this has something to do with the check for the end of the doc_locations[] array that contains an (unsigned long)0xffffffff element, which is compared against the signed (int)0xffffffff. If this is the case, we should get a runtime check for undefined behavior, but we instead get an unexpected build-time error. I would have expected this to work fine on 32-bit architectures despite the signed integer overflow, though on 64-bit architectures this likely won't ever work. Changing the contition to instead check for the size of the array makes the code safe everywhere and avoids the ubsan check that leads to the link error. The loop code goes back to before 2.6.12. Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240405143015.717429-1-arnd@kernel.org --- drivers/mtd/nand/raw/diskonchip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c index 5243fab9face..8db7fc424571 100644 --- a/drivers/mtd/nand/raw/diskonchip.c +++ b/drivers/mtd/nand/raw/diskonchip.c @@ -53,7 +53,7 @@ static unsigned long doc_locations[] __initdata = { 0xe8000, 0xea000, 0xec000, 0xee000, #endif #endif - 0xffffffff }; +}; static struct mtd_info *doclist = NULL; @@ -1554,7 +1554,7 @@ static int __init init_nanddoc(void) if (ret < 0) return ret; } else { - for (i = 0; (doc_locations[i] != 0xffffffff); i++) { + for (i = 0; i < ARRAY_SIZE(doc_locations); i++) { doc_probe(doc_locations[i]); } } From abe6acfa7d7b666d785eae706bd34b63f3c2b11f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Fri, 5 Apr 2024 21:40:29 +0000 Subject: [PATCH 070/313] fs: Return ENOTTY directly if FS_IOC_GETUUID or FS_IOC_GETFSSYSFSPATH fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These IOCTL commands should be implemented by setting attributes on the superblock, rather than in the IOCTL hooks in struct file_operations. By returning -ENOTTY instead of -ENOIOCTLCMD, we instruct the fs/ioctl.c logic to return -ENOTTY immediately, rather than attempting to call f_op->unlocked_ioctl() or f_op->compat_ioctl() as a fallback. Why this is safe: Before this change, fs/ioctl.c would unsuccessfully attempt calling the IOCTL hooks, and then return -ENOTTY. By returning -ENOTTY directly, we return the same error code immediately, but save ourselves the fallback attempt. Motivation: This simplifies the logic for these IOCTL commands and lets us reason about the side effects of these IOCTLs more easily. It will be possible to permit these IOCTLs under LSM IOCTL policies, without having to worry about them getting dispatched to problematic device drivers (which sometimes do work before looking at the IOCTL command number). Link: https://lore.kernel.org/all/cnwpkeovzbumhprco7q2c2y6zxzmxfpwpwe3tyy6c3gg2szgqd@vfzjaw5v5imr/ Cc: Kent Overstreet Cc: Christian Brauner Cc: Jan Kara Cc: Dave Chinner Cc: Darrick J. Wong Cc: Theodore Ts'o Cc: Josef Bacik Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20240405214040.101396-2-gnoack@google.com Acked-by: Kent Overstreet Signed-off-by: Christian Brauner --- fs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 1d5abfdf0f22..fb0628e680c4 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -769,7 +769,7 @@ static int ioctl_getfsuuid(struct file *file, void __user *argp) struct fsuuid2 u = { .len = sb->s_uuid_len, }; if (!sb->s_uuid_len) - return -ENOIOCTLCMD; + return -ENOTTY; memcpy(&u.uuid[0], &sb->s_uuid, sb->s_uuid_len); @@ -781,7 +781,7 @@ static int ioctl_get_fs_sysfs_path(struct file *file, void __user *argp) struct super_block *sb = file_inode(file)->i_sb; if (!strlen(sb->s_sysfs_name)) - return -ENOIOCTLCMD; + return -ENOTTY; struct fs_sysfs_path u = {}; From ed09f81eeaa8f9265e1787282cb283f10285c259 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sat, 6 Apr 2024 15:01:09 +0200 Subject: [PATCH 071/313] firmware: qcom: uefisecapp: Fix memory related IO errors and crashes It turns out that while the QSEECOM APP_SEND command has specific fields for request and response buffers, uefisecapp expects them both to be in a single memory region. Failure to adhere to this has (so far) resulted in either no response being written to the response buffer (causing an EIO to be emitted down the line), the SCM call to fail with EINVAL (i.e., directly from TZ/firmware), or the device to be hard-reset. While this issue can be triggered deterministically, in the current form it seems to happen rather sporadically (which is why it has gone unnoticed during earlier testing). This is likely due to the two kzalloc() calls (for request and response) being directly after each other. Which means that those likely return consecutive regions most of the time, especially when not much else is going on in the system. Fix this by allocating a single memory region for both request and response buffers, properly aligning both structs inside it. This unfortunately also means that the qcom_scm_qseecom_app_send() interface needs to be restructured, as it should no longer map the DMA regions separately. Therefore, move the responsibility of DMA allocation (or mapping) to the caller. Fixes: 759e7a2b62eb ("firmware: Add support for Qualcomm UEFI Secure Application") Cc: stable@vger.kernel.org # 6.7 Tested-by: Johan Hovold Reviewed-by: Johan Hovold Signed-off-by: Maximilian Luz Tested-by: Konrad Dybcio # X13s Link: https://lore.kernel.org/r/20240406130125.1047436-1-luzmaximilian@gmail.com Signed-off-by: Bjorn Andersson --- .../firmware/qcom/qcom_qseecom_uefisecapp.c | 137 ++++++++++++------ drivers/firmware/qcom/qcom_scm.c | 37 +---- include/linux/firmware/qcom/qcom_qseecom.h | 55 ++++++- include/linux/firmware/qcom/qcom_scm.h | 10 +- 4 files changed, 153 insertions(+), 86 deletions(-) diff --git a/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c b/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c index 32188f098ef3..bc550ad0dbe0 100644 --- a/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c +++ b/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c @@ -221,6 +221,19 @@ struct qsee_rsp_uefi_query_variable_info { * alignment of 8 bytes (64 bits) for GUIDs. Our definition of efi_guid_t, * however, has an alignment of 4 byte (32 bits). So far, this seems to work * fine here. See also the comment on the typedef of efi_guid_t. + * + * Note: It looks like uefisecapp is quite picky about how the memory passed to + * it is structured and aligned. In particular the request/response setup used + * for QSEE_CMD_UEFI_GET_VARIABLE. While qcom_qseecom_app_send(), in theory, + * accepts separate buffers/addresses for the request and response parts, in + * practice, however, it seems to expect them to be both part of a larger + * contiguous block. We initially allocated separate buffers for the request + * and response but this caused the QSEE_CMD_UEFI_GET_VARIABLE command to + * either not write any response to the response buffer or outright crash the + * device. Therefore, we now allocate a single contiguous block of DMA memory + * for both and properly align the data using the macros below. In particular, + * request and response structs are aligned at 8 byte (via __reqdata_offs()), + * following the driver that this has been reverse-engineered from. */ #define qcuefi_buf_align_fields(fields...) \ ({ \ @@ -244,6 +257,12 @@ struct qsee_rsp_uefi_query_variable_info { #define __array_offs(type, count, offset) \ __field_impl(sizeof(type) * (count), __alignof__(type), offset) +#define __array_offs_aligned(type, count, align, offset) \ + __field_impl(sizeof(type) * (count), align, offset) + +#define __reqdata_offs(size, offset) \ + __array_offs_aligned(u8, size, 8, offset) + #define __array(type, count) __array_offs(type, count, NULL) #define __field_offs(type, offset) __array_offs(type, 1, offset) #define __field(type) __array_offs(type, 1, NULL) @@ -277,10 +296,15 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e unsigned long buffer_size = *data_size; efi_status_t efi_status = EFI_SUCCESS; unsigned long name_length; + dma_addr_t cmd_buf_dma; + size_t cmd_buf_size; + void *cmd_buf; size_t guid_offs; size_t name_offs; size_t req_size; size_t rsp_size; + size_t req_offs; + size_t rsp_offs; ssize_t status; if (!name || !guid) @@ -304,17 +328,19 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e __array(u8, buffer_size) ); - req_data = kzalloc(req_size, GFP_KERNEL); - if (!req_data) { + cmd_buf_size = qcuefi_buf_align_fields( + __reqdata_offs(req_size, &req_offs) + __reqdata_offs(rsp_size, &rsp_offs) + ); + + cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL); + if (!cmd_buf) { efi_status = EFI_OUT_OF_RESOURCES; goto out; } - rsp_data = kzalloc(rsp_size, GFP_KERNEL); - if (!rsp_data) { - efi_status = EFI_OUT_OF_RESOURCES; - goto out_free_req; - } + req_data = cmd_buf + req_offs; + rsp_data = cmd_buf + rsp_offs; req_data->command_id = QSEE_CMD_UEFI_GET_VARIABLE; req_data->data_size = buffer_size; @@ -332,7 +358,9 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e memcpy(((void *)req_data) + req_data->guid_offset, guid, req_data->guid_size); - status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, rsp_size); + status = qcom_qseecom_app_send(qcuefi->client, + cmd_buf_dma + req_offs, req_size, + cmd_buf_dma + rsp_offs, rsp_size); if (status) { efi_status = EFI_DEVICE_ERROR; goto out_free; @@ -407,9 +435,7 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e memcpy(data, ((void *)rsp_data) + rsp_data->data_offset, rsp_data->data_size); out_free: - kfree(rsp_data); -out_free_req: - kfree(req_data); + qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma); out: return efi_status; } @@ -422,10 +448,15 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e struct qsee_rsp_uefi_set_variable *rsp_data; efi_status_t efi_status = EFI_SUCCESS; unsigned long name_length; + dma_addr_t cmd_buf_dma; + size_t cmd_buf_size; + void *cmd_buf; size_t name_offs; size_t guid_offs; size_t data_offs; size_t req_size; + size_t req_offs; + size_t rsp_offs; ssize_t status; if (!name || !guid) @@ -450,17 +481,19 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e __array_offs(u8, data_size, &data_offs) ); - req_data = kzalloc(req_size, GFP_KERNEL); - if (!req_data) { + cmd_buf_size = qcuefi_buf_align_fields( + __reqdata_offs(req_size, &req_offs) + __reqdata_offs(sizeof(*rsp_data), &rsp_offs) + ); + + cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL); + if (!cmd_buf) { efi_status = EFI_OUT_OF_RESOURCES; goto out; } - rsp_data = kzalloc(sizeof(*rsp_data), GFP_KERNEL); - if (!rsp_data) { - efi_status = EFI_OUT_OF_RESOURCES; - goto out_free_req; - } + req_data = cmd_buf + req_offs; + rsp_data = cmd_buf + rsp_offs; req_data->command_id = QSEE_CMD_UEFI_SET_VARIABLE; req_data->attributes = attributes; @@ -483,8 +516,9 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e if (data_size) memcpy(((void *)req_data) + req_data->data_offset, data, req_data->data_size); - status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, - sizeof(*rsp_data)); + status = qcom_qseecom_app_send(qcuefi->client, + cmd_buf_dma + req_offs, req_size, + cmd_buf_dma + rsp_offs, sizeof(*rsp_data)); if (status) { efi_status = EFI_DEVICE_ERROR; goto out_free; @@ -507,9 +541,7 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e } out_free: - kfree(rsp_data); -out_free_req: - kfree(req_data); + qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma); out: return efi_status; } @@ -521,10 +553,15 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi, struct qsee_req_uefi_get_next_variable *req_data; struct qsee_rsp_uefi_get_next_variable *rsp_data; efi_status_t efi_status = EFI_SUCCESS; + dma_addr_t cmd_buf_dma; + size_t cmd_buf_size; + void *cmd_buf; size_t guid_offs; size_t name_offs; size_t req_size; size_t rsp_size; + size_t req_offs; + size_t rsp_offs; ssize_t status; if (!name_size || !name || !guid) @@ -545,17 +582,19 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi, __array(*name, *name_size / sizeof(*name)) ); - req_data = kzalloc(req_size, GFP_KERNEL); - if (!req_data) { + cmd_buf_size = qcuefi_buf_align_fields( + __reqdata_offs(req_size, &req_offs) + __reqdata_offs(rsp_size, &rsp_offs) + ); + + cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL); + if (!cmd_buf) { efi_status = EFI_OUT_OF_RESOURCES; goto out; } - rsp_data = kzalloc(rsp_size, GFP_KERNEL); - if (!rsp_data) { - efi_status = EFI_OUT_OF_RESOURCES; - goto out_free_req; - } + req_data = cmd_buf + req_offs; + rsp_data = cmd_buf + rsp_offs; req_data->command_id = QSEE_CMD_UEFI_GET_NEXT_VARIABLE; req_data->guid_offset = guid_offs; @@ -572,7 +611,9 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi, goto out_free; } - status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, rsp_size); + status = qcom_qseecom_app_send(qcuefi->client, + cmd_buf_dma + req_offs, req_size, + cmd_buf_dma + rsp_offs, rsp_size); if (status) { efi_status = EFI_DEVICE_ERROR; goto out_free; @@ -645,9 +686,7 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi, } out_free: - kfree(rsp_data); -out_free_req: - kfree(req_data); + qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma); out: return efi_status; } @@ -659,26 +698,34 @@ static efi_status_t qsee_uefi_query_variable_info(struct qcuefi_client *qcuefi, struct qsee_req_uefi_query_variable_info *req_data; struct qsee_rsp_uefi_query_variable_info *rsp_data; efi_status_t efi_status = EFI_SUCCESS; + dma_addr_t cmd_buf_dma; + size_t cmd_buf_size; + void *cmd_buf; + size_t req_offs; + size_t rsp_offs; int status; - req_data = kzalloc(sizeof(*req_data), GFP_KERNEL); - if (!req_data) { + cmd_buf_size = qcuefi_buf_align_fields( + __reqdata_offs(sizeof(*req_data), &req_offs) + __reqdata_offs(sizeof(*rsp_data), &rsp_offs) + ); + + cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL); + if (!cmd_buf) { efi_status = EFI_OUT_OF_RESOURCES; goto out; } - rsp_data = kzalloc(sizeof(*rsp_data), GFP_KERNEL); - if (!rsp_data) { - efi_status = EFI_OUT_OF_RESOURCES; - goto out_free_req; - } + req_data = cmd_buf + req_offs; + rsp_data = cmd_buf + rsp_offs; req_data->command_id = QSEE_CMD_UEFI_QUERY_VARIABLE_INFO; req_data->attributes = attr; req_data->length = sizeof(*req_data); - status = qcom_qseecom_app_send(qcuefi->client, req_data, sizeof(*req_data), rsp_data, - sizeof(*rsp_data)); + status = qcom_qseecom_app_send(qcuefi->client, + cmd_buf_dma + req_offs, sizeof(*req_data), + cmd_buf_dma + rsp_offs, sizeof(*rsp_data)); if (status) { efi_status = EFI_DEVICE_ERROR; goto out_free; @@ -711,9 +758,7 @@ static efi_status_t qsee_uefi_query_variable_info(struct qcuefi_client *qcuefi, *max_variable_size = rsp_data->max_variable_size; out_free: - kfree(rsp_data); -out_free_req: - kfree(req_data); + qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma); out: return efi_status; } diff --git a/drivers/firmware/qcom/qcom_scm.c b/drivers/firmware/qcom/qcom_scm.c index 520de9b5633a..90283f160a22 100644 --- a/drivers/firmware/qcom/qcom_scm.c +++ b/drivers/firmware/qcom/qcom_scm.c @@ -1576,9 +1576,9 @@ EXPORT_SYMBOL_GPL(qcom_scm_qseecom_app_get_id); /** * qcom_scm_qseecom_app_send() - Send to and receive data from a given QSEE app. * @app_id: The ID of the target app. - * @req: Request buffer sent to the app (must be DMA-mappable). + * @req: DMA address of the request buffer sent to the app. * @req_size: Size of the request buffer. - * @rsp: Response buffer, written to by the app (must be DMA-mappable). + * @rsp: DMA address of the response buffer, written to by the app. * @rsp_size: Size of the response buffer. * * Sends a request to the QSEE app associated with the given ID and read back @@ -1589,33 +1589,13 @@ EXPORT_SYMBOL_GPL(qcom_scm_qseecom_app_get_id); * * Return: Zero on success, nonzero on failure. */ -int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, - size_t rsp_size) +int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) { struct qcom_scm_qseecom_resp res = {}; struct qcom_scm_desc desc = {}; - dma_addr_t req_phys; - dma_addr_t rsp_phys; int status; - /* Map request buffer */ - req_phys = dma_map_single(__scm->dev, req, req_size, DMA_TO_DEVICE); - status = dma_mapping_error(__scm->dev, req_phys); - if (status) { - dev_err(__scm->dev, "qseecom: failed to map request buffer\n"); - return status; - } - - /* Map response buffer */ - rsp_phys = dma_map_single(__scm->dev, rsp, rsp_size, DMA_FROM_DEVICE); - status = dma_mapping_error(__scm->dev, rsp_phys); - if (status) { - dma_unmap_single(__scm->dev, req_phys, req_size, DMA_TO_DEVICE); - dev_err(__scm->dev, "qseecom: failed to map response buffer\n"); - return status; - } - - /* Set up SCM call data */ desc.owner = QSEECOM_TZ_OWNER_TZ_APPS; desc.svc = QSEECOM_TZ_SVC_APP_ID_PLACEHOLDER; desc.cmd = QSEECOM_TZ_CMD_APP_SEND; @@ -1623,18 +1603,13 @@ int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, QCOM_SCM_RW, QCOM_SCM_VAL, QCOM_SCM_RW, QCOM_SCM_VAL); desc.args[0] = app_id; - desc.args[1] = req_phys; + desc.args[1] = req; desc.args[2] = req_size; - desc.args[3] = rsp_phys; + desc.args[3] = rsp; desc.args[4] = rsp_size; - /* Perform call */ status = qcom_scm_qseecom_call(&desc, &res); - /* Unmap buffers */ - dma_unmap_single(__scm->dev, rsp_phys, rsp_size, DMA_FROM_DEVICE); - dma_unmap_single(__scm->dev, req_phys, req_size, DMA_TO_DEVICE); - if (status) return status; diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h index 5c28298a98be..366243ee9609 100644 --- a/include/linux/firmware/qcom/qcom_qseecom.h +++ b/include/linux/firmware/qcom/qcom_qseecom.h @@ -10,6 +10,7 @@ #define __QCOM_QSEECOM_H #include +#include #include #include @@ -24,12 +25,57 @@ struct qseecom_client { u32 app_id; }; +/** + * qseecom_scm_dev() - Get the SCM device associated with the QSEECOM client. + * @client: The QSEECOM client device. + * + * Returns the SCM device under which the provided QSEECOM client device + * operates. This function is intended to be used for DMA allocations. + */ +static inline struct device *qseecom_scm_dev(struct qseecom_client *client) +{ + return client->aux_dev.dev.parent->parent; +} + +/** + * qseecom_dma_alloc() - Allocate DMA memory for a QSEECOM client. + * @client: The QSEECOM client to allocate the memory for. + * @size: The number of bytes to allocate. + * @dma_handle: Pointer to where the DMA address should be stored. + * @gfp: Allocation flags. + * + * Wrapper function for dma_alloc_coherent(), allocating DMA memory usable for + * TZ/QSEECOM communication. Refer to dma_alloc_coherent() for details. + */ +static inline void *qseecom_dma_alloc(struct qseecom_client *client, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + return dma_alloc_coherent(qseecom_scm_dev(client), size, dma_handle, gfp); +} + +/** + * dma_free_coherent() - Free QSEECOM DMA memory. + * @client: The QSEECOM client for which the memory has been allocated. + * @size: The number of bytes allocated. + * @cpu_addr: Virtual memory address to free. + * @dma_handle: DMA memory address to free. + * + * Wrapper function for dma_free_coherent(), freeing memory previously + * allocated with qseecom_dma_alloc(). Refer to dma_free_coherent() for + * details. + */ +static inline void qseecom_dma_free(struct qseecom_client *client, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + return dma_free_coherent(qseecom_scm_dev(client), size, cpu_addr, dma_handle); +} + /** * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app. * @client: The QSEECOM client associated with the target app. - * @req: Request buffer sent to the app (must be DMA-mappable). + * @req: DMA address of the request buffer sent to the app. * @req_size: Size of the request buffer. - * @rsp: Response buffer, written to by the app (must be DMA-mappable). + * @rsp: DMA address of the response buffer, written to by the app. * @rsp_size: Size of the response buffer. * * Sends a request to the QSEE app associated with the given client and read @@ -43,8 +89,9 @@ struct qseecom_client { * * Return: Zero on success, nonzero on failure. */ -static inline int qcom_qseecom_app_send(struct qseecom_client *client, void *req, size_t req_size, - void *rsp, size_t rsp_size) +static inline int qcom_qseecom_app_send(struct qseecom_client *client, + dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) { return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size); } diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index ccaf28846054..aaa19f93ac43 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -118,8 +118,8 @@ bool qcom_scm_lmh_dcvsh_available(void); #ifdef CONFIG_QCOM_QSEECOM int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); -int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, - size_t rsp_size); +int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size); #else /* CONFIG_QCOM_QSEECOM */ @@ -128,9 +128,9 @@ static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id) return -EINVAL; } -static inline int qcom_scm_qseecom_app_send(u32 app_id, void *req, - size_t req_size, void *rsp, - size_t rsp_size) +static inline int qcom_scm_qseecom_app_send(u32 app_id, + dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) { return -EINVAL; } From 6065e736f82c817c9a597a31ee67f0ce4628e948 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 26 Feb 2024 16:34:46 -0800 Subject: [PATCH 072/313] riscv: Fix TASK_SIZE on 64-bit NOMMU On NOMMU, userspace memory can come from anywhere in physical RAM. The current definition of TASK_SIZE is wrong if any RAM exists above 4G, causing spurious failures in the userspace access routines. Fixes: 6bd33e1ece52 ("riscv: add nommu support") Fixes: c3f896dcf1e4 ("mm: switch the test_vmalloc module to use __vmalloc_node") Signed-off-by: Samuel Holland Reviewed-by: Jisheng Zhang Reviewed-by: Bo Gan Link: https://lore.kernel.org/r/20240227003630.3634533-2-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 0c94260b5d0c..a564a39e5676 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -882,7 +882,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #define PAGE_SHARED __pgprot(0) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL -#define TASK_SIZE 0xffffffffUL +#define TASK_SIZE _AC(-1, UL) #define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE From aea702dde7e9876fb00571a2602f25130847bf0f Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 26 Feb 2024 16:34:47 -0800 Subject: [PATCH 073/313] riscv: Fix loading 64-bit NOMMU kernels past the start of RAM commit 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping") added logic to allow using RAM below the kernel load address. However, this does not work for NOMMU, where PAGE_OFFSET is fixed to the kernel load address. Since that range of memory corresponds to PFNs below ARCH_PFN_OFFSET, mm initialization runs off the beginning of mem_map and corrupts adjacent kernel memory. Fix this by restoring the previous behavior for NOMMU kernels. Fixes: 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping") Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240227003630.3634533-3-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/page.h | 2 +- arch/riscv/mm/init.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 57e887bfa34c..94b3d6930fc3 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -89,7 +89,7 @@ typedef struct page *pgtable_t; #define PTE_FMT "%08lx" #endif -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) && defined(CONFIG_MMU) /* * We override this value as its generic definition uses __pa too early in * the boot process (before kernel_map.va_pa_offset is set). diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 32cad6a65ccd..b3e63dec3ab5 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -232,7 +232,7 @@ static void __init setup_bootmem(void) * In 64-bit, any use of __va/__pa before this point is wrong as we * did not know the start of DRAM before. */ - if (IS_ENABLED(CONFIG_64BIT)) + if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; /* From c2b6d3a2bbf6352f7cddff2abe81dc4af4887672 Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Sun, 7 Apr 2024 19:56:24 +0200 Subject: [PATCH 074/313] arm64: dts: rockchip: Designate the system power controller on QuartzPro64 Designate the primary RK806 PMIC on the Pine64 QuartzPro64 as the system power controller, so the board shuts down properly on poweroff(8). Fixes: 152d3d070a9c ("arm64: dts: rockchip: Add QuartzPro64 SBC device tree") Signed-off-by: Dragan Simic Link: https://lore.kernel.org/r/c602dfb3972a0844f2a87b6245bdc5c3378c5989.1712512497.git.dsimic@manjaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts index 67414d72e2b6..22bbfbe729c1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts @@ -456,6 +456,7 @@ pmic@0 { <&rk806_dvs2_null>, <&rk806_dvs3_null>; pinctrl-names = "default"; spi-max-frequency = <1000000>; + system-power-controller; vcc1-supply = <&vcc4v0_sys>; vcc2-supply = <&vcc4v0_sys>; From 08cd20bdecd9cfde5c1aec6146fa22ca753efea1 Mon Sep 17 00:00:00 2001 From: Muhammed Efe Cetin Date: Sun, 7 Apr 2024 20:32:10 +0300 Subject: [PATCH 075/313] arm64: dts: rockchip: mark system power controller and fix typo on orangepi-5-plus Mark the PMIC as system power controller, so the board will shut-down properly and fix the typo on rk806_dvs1_null pins property. Fixes: 236d225e1ee7 ("arm64: dts: rockchip: Add board device tree for rk3588-orangepi-5-plus") Signed-off-by: Muhammed Efe Cetin Reviewed-by: Dragan Simic Link: https://lore.kernel.org/r/20240407173210.372585-1-efectn@6tel.net Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts index 1b606ea5b6cf..1a604429fb26 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts @@ -485,6 +485,7 @@ pmic@0 { pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>, <&rk806_dvs2_null>, <&rk806_dvs3_null>; spi-max-frequency = <1000000>; + system-power-controller; vcc1-supply = <&vcc5v0_sys>; vcc2-supply = <&vcc5v0_sys>; @@ -506,7 +507,7 @@ pmic@0 { #gpio-cells = <2>; rk806_dvs1_null: dvs1-null-pins { - pins = "gpio_pwrctrl2"; + pins = "gpio_pwrctrl1"; function = "pin_fun0"; }; From 29148d841edea9335141fae86a0742f539fe1327 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 7 Apr 2024 12:28:51 +0200 Subject: [PATCH 076/313] arm64: dts: rockchip: drop redundant pcie-reset-suspend in Scarlet Dumo There is no "pcie-reset-suspend" property in the PCI bindings or Linux driver, so assume this was copied from downstream. Drop the property, but leave the comment, because it might be useful for someone. This fixes dtbs_check warning: rk3399-gru-scarlet-dumo.dtb: pcie@f8000000: Unevaluated properties are not allowed ('pcie-reset-suspend' was unexpected) Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240407102854.38672-1-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index 6d1e3ca86392..d5e035823eb5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -689,7 +689,6 @@ &pcie0 { ep-gpios = <&gpio0 3 GPIO_ACTIVE_HIGH>; /* PERST# asserted in S3 */ - pcie-reset-suspend = <1>; vpcie3v3-supply = <&wlan_3v3>; vpcie1v8-supply = <&pp1800_pcie>; From cd0793fc3b03985d90f24232056853ef79ff555e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 7 Apr 2024 12:28:53 +0200 Subject: [PATCH 077/313] arm64: dts: rockchip: drop redundant disable-gpios in Lubancat 1 There is no "disable-gpios" property in the PCI bindings or Linux driver, so assume this was copied from downstream. This property looks like some real hardware, just described wrongly. Rockchip PCIe controller (DesignWare based) does not define any other GPIO-s property, except reset-gpios which is already there, so not sure what would be the real property for this GPIO. This fixes dtbs_check warning: rk3566-lubancat-1.dtb: pcie@fe260000: Unevaluated properties are not allowed ('disable-gpios' was unexpected) Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240407102854.38672-3-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts index 6ecdf5d28339..c1194d1e438d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts @@ -447,7 +447,6 @@ rgmii_phy1: phy@0 { &pcie2x1 { reset-gpios = <&gpio0 RK_PB6 GPIO_ACTIVE_HIGH>; - disable-gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>; vpcie3v3-supply = <&vcc3v3_pcie>; status = "okay"; }; From d892a6f34adc371ee0dbaa5ba684d02c4431f2e3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 7 Apr 2024 12:28:54 +0200 Subject: [PATCH 078/313] arm64: dts: rockchip: drop redundant disable-gpios in Lubancat 2 There is no "disable-gpios" property in the PCI bindings or Linux driver, so assume this was copied from downstream. This property looks like some real hardware, just described wrongly. Rockchip PCIe controller (DesignWare based) does not define any other GPIO-s property, except reset-gpios which is already there, so not sure what would be the real property for this GPIO. This fixes dtbs_check warning: rk3568-lubancat-2.dtb: pcie@fe260000: Unevaluated properties are not allowed ('disable-gpios' was unexpected) Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240407102854.38672-4-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts index a8a4cc190eb3..a3112d5df200 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts @@ -523,7 +523,6 @@ &pcie3x2 { &pcie2x1 { reset-gpios = <&gpio3 RK_PC1 GPIO_ACTIVE_HIGH>; - disable-gpios = <&gpio3 RK_PC2 GPIO_ACTIVE_HIGH>; vpcie3v3-supply = <&vcc3v3_mini_pcie>; status = "okay"; }; From d41201c90f825f19a46afbfb502f22f612d8ccc4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 1 Apr 2024 15:49:58 -0500 Subject: [PATCH 079/313] dt-bindings: rockchip: grf: Add missing type to 'pcie-phy' node 'pcie-phy' is missing any type. Add 'type: object' to indicate it's a node. Signed-off-by: Rob Herring Reviewed-by: Heiko Stuebner Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240401204959.1698106-1-robh@kernel.org Signed-off-by: Heiko Stuebner --- Documentation/devicetree/bindings/soc/rockchip/grf.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml index 0b87c266760c..79798c747476 100644 --- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml +++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml @@ -171,6 +171,7 @@ allOf: unevaluatedProperties: false pcie-phy: + type: object description: Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt From 433d54818f64a2fe0562f8c04c7a81f562368515 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Tue, 5 Mar 2024 15:32:18 +0100 Subject: [PATCH 080/313] arm64: dts: rockchip: regulator for sd needs to be always on for BPI-R2Pro With default dts configuration for BPI-R2Pro, the regulator for sd card is powered off when reboot is commanded, and the only solution to detect the sd card again, and therefore, allow rebooting from there, is to do a hardware reset. Configure the regulator for sd to be always on for BPI-R2Pro in order to avoid this issue. Fixes: f901aaadaa2a ("arm64: dts: rockchip: Add Bananapi R2 Pro") Signed-off-by: Jose Ignacio Tornos Martinez Link: https://lore.kernel.org/r/20240305143222.189413-1-jtornosm@redhat.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index 03d6d920446a..c87fad2c34cb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -416,6 +416,8 @@ regulator-state-mem { vccio_sd: LDO_REG5 { regulator-name = "vccio_sd"; + regulator-always-on; + regulator-boot-on; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; From f141dde5dc51ecab18e8b12b76eb416cda0d6798 Mon Sep 17 00:00:00 2001 From: Matthew Sakai Date: Fri, 5 Apr 2024 21:26:21 +0200 Subject: [PATCH 081/313] dm vdo murmurhash: remove unneeded semicolon Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404050327.4ebVLBD3-lkp@intel.com/ Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-vdo/murmurhash3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-vdo/murmurhash3.c b/drivers/md/dm-vdo/murmurhash3.c index 01d2743444ec..3a989efae142 100644 --- a/drivers/md/dm-vdo/murmurhash3.c +++ b/drivers/md/dm-vdo/murmurhash3.c @@ -137,7 +137,7 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out) break; default: break; - }; + } } /* finalization */ From 9617cd6f24b294552a817f80f5225431ef67b540 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 6 Apr 2024 17:09:25 +0800 Subject: [PATCH 082/313] block: fix module reference leakage from bdev_open_by_dev error path At the time bdev_may_open() is called, module reference is grabbed already, hence module reference should be released if bdev_may_open() failed. This problem is found by code review. Fixes: ed5cc702d311 ("block: Add config option to not allow writing to mounted devices") Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20240406090930.2252838-22-yukuai1@huaweicloud.com Signed-off-by: Christian Brauner --- block/bdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bdev.c b/block/bdev.c index b8e32d933a63..3caf24e66559 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -873,7 +873,7 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, goto abort_claiming; ret = -EBUSY; if (!bdev_may_open(bdev, mode)) - goto abort_claiming; + goto put_module; if (bdev_is_partition(bdev)) ret = blkdev_get_part(bdev, mode); else From e1c9216bec2793d051f83d77d93d3d6a899d06d1 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 9 Apr 2024 01:29:25 +0300 Subject: [PATCH 083/313] phy: phy-rockchip-samsung-hdptx: Select CONFIG_RATIONAL Ensure CONFIG_RATIONAL is selected in order to fix the following link error with some kernel configurations: drivers/phy/rockchip/phy-rockchip-samsung-hdptx.o: in function `rk_hdptx_ropll_tmds_cmn_config': phy-rockchip-samsung-hdptx.c:(.text+0x950): undefined reference to `rational_best_approximation' Fixes: 553be2830c5f ("phy: rockchip: Add Samsung HDMI/eDP Combo PHY driver") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404090540.2l1TEkDF-lkp@intel.com/ Signed-off-by: Cristian Ciocaltea Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240408222926.32708-1-cristian.ciocaltea@collabora.com Signed-off-by: Vinod Koul --- drivers/phy/rockchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/rockchip/Kconfig b/drivers/phy/rockchip/Kconfig index a34f67bb7e61..b60a4b60451e 100644 --- a/drivers/phy/rockchip/Kconfig +++ b/drivers/phy/rockchip/Kconfig @@ -87,6 +87,7 @@ config PHY_ROCKCHIP_SAMSUNG_HDPTX tristate "Rockchip Samsung HDMI/eDP Combo PHY driver" depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF select GENERIC_PHY + select RATIONAL help Enable this to support the Rockchip HDMI/eDP Combo PHY with Samsung IP block. From 025a6f7448f7bb5f4fceb62498ee33d89ae266bb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Apr 2024 11:30:23 +0200 Subject: [PATCH 084/313] phy: qcom: qmp-combo: fix VCO div offset on v5_5nm and v6 Commit 5abed58a8bde ("phy: qcom: qmp-combo: Fix VCO div offset on v3") fixed a regression introduced in 6.5 by making sure that the correct offset is used for the DP_PHY_VCO_DIV register on v3 hardware. Unfortunately, that fix instead broke DisplayPort on v5_5nm and v6 hardware as it failed to add the corresponding offsets also to those register tables. Fixes: 815891eee668 ("phy: qcom-qmp-combo: Introduce orientation variable") Fixes: 5abed58a8bde ("phy: qcom: qmp-combo: Fix VCO div offset on v3") Cc: stable@vger.kernel.org # 6.5: 5abed58a8bde Cc: Stephen Boyd Cc: Abhinav Kumar Cc: Dmitry Baryshkov Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Reviewed-by: Stephen Boyd Reviewed-by: Abhinav Kumar Link: https://lore.kernel.org/r/20240408093023.506-1-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 ++ drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h | 1 + drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 2a6f70b3e25f..c21cdb8dbfe7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -153,6 +153,7 @@ static const unsigned int qmp_v5_5nm_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, [QPHY_DP_PHY_STATUS] = QSERDES_V5_DP_PHY_STATUS, + [QPHY_DP_PHY_VCO_DIV] = QSERDES_V5_DP_PHY_VCO_DIV, [QPHY_TX_TX_POL_INV] = QSERDES_V5_5NM_TX_TX_POL_INV, [QPHY_TX_TX_DRV_LVL] = QSERDES_V5_5NM_TX_TX_DRV_LVL, @@ -177,6 +178,7 @@ static const unsigned int qmp_v6_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V6_COM_PLL_BIAS_EN_CLK_BUFLR_EN, [QPHY_DP_PHY_STATUS] = QSERDES_V6_DP_PHY_STATUS, + [QPHY_DP_PHY_VCO_DIV] = QSERDES_V6_DP_PHY_VCO_DIV, [QPHY_TX_TX_POL_INV] = QSERDES_V6_TX_TX_POL_INV, [QPHY_TX_TX_DRV_LVL] = QSERDES_V6_TX_TX_DRV_LVL, diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h index f5cfacf9be96..181057421c11 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h @@ -7,6 +7,7 @@ #define QCOM_PHY_QMP_DP_PHY_V5_H_ /* Only for QMP V5 PHY - DP PHY registers */ +#define QSERDES_V5_DP_PHY_VCO_DIV 0x070 #define QSERDES_V5_DP_PHY_AUX_INTERRUPT_STATUS 0x0d8 #define QSERDES_V5_DP_PHY_STATUS 0x0dc diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h index 01a20d3be4b8..fa967a1af058 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h @@ -7,6 +7,7 @@ #define QCOM_PHY_QMP_DP_PHY_V6_H_ /* Only for QMP V6 PHY - DP PHY registers */ +#define QSERDES_V6_DP_PHY_VCO_DIV 0x070 #define QSERDES_V6_DP_PHY_AUX_INTERRUPT_STATUS 0x0e0 #define QSERDES_V6_DP_PHY_STATUS 0x0e4 From bf6e4ee5c43690e4c5a8a057bbcd4ff986bed052 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 6 Apr 2024 16:08:21 +0200 Subject: [PATCH 085/313] phy: ti: tusb1210: Resolve charger-det crash if charger psy is unregistered The power_supply frame-work is not really designed for there to be long living in kernel references to power_supply devices. Specifically unregistering a power_supply while some other code has a reference to it triggers a WARN in power_supply_unregister(): WARN_ON(atomic_dec_return(&psy->use_cnt)); Folllowed by the power_supply still getting removed and the backing data freed anyway, leaving the tusb1210 charger-detect code with a dangling reference, resulting in a crash the next time tusb1210_get_online() is called. Fix this by only holding the reference in tusb1210_get_online() freeing it at the end of the function. Note this still leaves a theoretical race window, but it avoids the issue when manually rmmod-ing the charger chip driver during development. Fixes: 48969a5623ed ("phy: ti: tusb1210: Add charger detection") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240406140821.18624-1-hdegoede@redhat.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-tusb1210.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/phy/ti/phy-tusb1210.c b/drivers/phy/ti/phy-tusb1210.c index 13cd614e12a1..751fecd466e3 100644 --- a/drivers/phy/ti/phy-tusb1210.c +++ b/drivers/phy/ti/phy-tusb1210.c @@ -69,7 +69,6 @@ struct tusb1210 { struct delayed_work chg_det_work; struct notifier_block psy_nb; struct power_supply *psy; - struct power_supply *charger; #endif }; @@ -236,19 +235,24 @@ static const char * const tusb1210_chargers[] = { static bool tusb1210_get_online(struct tusb1210 *tusb) { + struct power_supply *charger = NULL; union power_supply_propval val; - int i; + bool online = false; + int i, ret; - for (i = 0; i < ARRAY_SIZE(tusb1210_chargers) && !tusb->charger; i++) - tusb->charger = power_supply_get_by_name(tusb1210_chargers[i]); + for (i = 0; i < ARRAY_SIZE(tusb1210_chargers) && !charger; i++) + charger = power_supply_get_by_name(tusb1210_chargers[i]); - if (!tusb->charger) + if (!charger) return false; - if (power_supply_get_property(tusb->charger, POWER_SUPPLY_PROP_ONLINE, &val)) - return false; + ret = power_supply_get_property(charger, POWER_SUPPLY_PROP_ONLINE, &val); + if (ret == 0) + online = val.intval; - return val.intval; + power_supply_put(charger); + + return online; } static void tusb1210_chg_det_work(struct work_struct *work) @@ -473,9 +477,6 @@ static void tusb1210_remove_charger_detect(struct tusb1210 *tusb) cancel_delayed_work_sync(&tusb->chg_det_work); power_supply_unregister(tusb->psy); } - - if (tusb->charger) - power_supply_put(tusb->charger); } #else static void tusb1210_probe_charger_detect(struct tusb1210 *tusb) { } From 3cba9cfcc1520a2307a29f6fab887bcfc121c417 Mon Sep 17 00:00:00 2001 From: Abdelrahman Morsy Date: Tue, 2 Apr 2024 14:14:06 +0200 Subject: [PATCH 086/313] HID: mcp-2221: cancel delayed_work only when CONFIG_IIO is enabled If the device is unplugged and CONFIG_IIO is not supported, this will result in a warning message at kernel/workqueue. Only cancel delayed work in mcp2221_remove(), when CONFIG_IIO is enabled. Signed-off-by: Abdelrahman Morsy Signed-off-by: Jiri Kosina --- drivers/hid/hid-mcp2221.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index f9cceaeffd08..da5ea5a23b08 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -944,9 +944,11 @@ static void mcp2221_hid_unregister(void *ptr) /* This is needed to be sure hid_hw_stop() isn't called twice by the subsystem */ static void mcp2221_remove(struct hid_device *hdev) { +#if IS_REACHABLE(CONFIG_IIO) struct mcp2221 *mcp = hid_get_drvdata(hdev); cancel_delayed_work_sync(&mcp->init_work); +#endif } #if IS_REACHABLE(CONFIG_IIO) From f011688162ec4c492c12ee7cced74c097270baa2 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Mon, 19 Feb 2024 15:33:27 +0100 Subject: [PATCH 087/313] arm64: dts: qcom: Fix type of "wdog" IRQs for remoteprocs The code in qcom_q6v5_init() requests the "wdog" IRQ as IRQF_TRIGGER_RISING. If dt defines the interrupt type as LEVEL_HIGH then the driver will have issues getting the IRQ again after probe deferral with an error like: irq: type mismatch, failed to map hwirq-14 for interrupt-controller@b220000! Fix that by updating the devicetrees to use IRQ_TYPE_EDGE_RISING for these interrupts, as is already used in most dt's. Also the driver was already using the interrupts with that type. Fixes: 3658e411efcb ("arm64: dts: qcom: sc7280: Add ADSP node") Fixes: df62402e5ff9 ("arm64: dts: qcom: sc7280: Add CDSP node") Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Fixes: 8eb5287e8a42 ("arm64: dts: qcom: sm6350: Add CDSP nodes") Fixes: efc33c969f23 ("arm64: dts: qcom: sm6350: Add ADSP nodes") Fixes: fe6fd26aeddf ("arm64: dts: qcom: sm6375: Add ADSP&CDSP") Fixes: 23a8903785b9 ("arm64: dts: qcom: sm8250: Add remoteprocs") Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20240219-remoteproc-irqs-v1-1-c5aeb02334bd@fairphone.com [bjorn: Added fixes references] Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc7280.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 6 +++--- arch/arm64/boot/dts/qcom/sm6350.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/sm6375.dtsi | 2 +- arch/arm64/boot/dts/qcom/sm8250.dtsi | 6 +++--- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 7e7f0f0fb41b..41f51d326111 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -3707,7 +3707,7 @@ remoteproc_adsp: remoteproc@3700000 { compatible = "qcom,sc7280-adsp-pas"; reg = <0 0x03700000 0 0x100>; - interrupts-extended = <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>, <&adsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>, <&adsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>, <&adsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>, @@ -3944,7 +3944,7 @@ remoteproc_cdsp: remoteproc@a300000 { compatible = "qcom,sc7280-cdsp-pas"; reg = <0 0x0a300000 0 0x10000>; - interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>, <&cdsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>, <&cdsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>, <&cdsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>, diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index a5b194813079..c9058c7fc1a3 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -2641,7 +2641,7 @@ remoteproc_adsp: remoteproc@3000000 { compatible = "qcom,sc8280xp-adsp-pas"; reg = <0 0x03000000 0 0x100>; - interrupts-extended = <&intc GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 162 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>, @@ -4977,7 +4977,7 @@ remoteproc_nsp0: remoteproc@1b300000 { compatible = "qcom,sc8280xp-nsp0-pas"; reg = <0 0x1b300000 0 0x100>; - interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>, <&smp2p_nsp0_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_nsp0_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_nsp0_in 2 IRQ_TYPE_EDGE_RISING>, @@ -5108,7 +5108,7 @@ remoteproc_nsp1: remoteproc@21300000 { compatible = "qcom,sc8280xp-nsp1-pas"; reg = <0 0x21300000 0 0x100>; - interrupts-extended = <&intc GIC_SPI 887 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 887 IRQ_TYPE_EDGE_RISING>, <&smp2p_nsp1_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_nsp1_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_nsp1_in 2 IRQ_TYPE_EDGE_RISING>, diff --git a/arch/arm64/boot/dts/qcom/sm6350.dtsi b/arch/arm64/boot/dts/qcom/sm6350.dtsi index 24bcec3366ef..0be053555602 100644 --- a/arch/arm64/boot/dts/qcom/sm6350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6350.dtsi @@ -1252,7 +1252,7 @@ adsp: remoteproc@3000000 { compatible = "qcom,sm6350-adsp-pas"; reg = <0 0x03000000 0 0x100>; - interrupts-extended = <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>, @@ -1511,7 +1511,7 @@ cdsp: remoteproc@8300000 { compatible = "qcom,sm6350-cdsp-pas"; reg = <0 0x08300000 0 0x10000>; - interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>, <&smp2p_cdsp_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_cdsp_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_cdsp_in 2 IRQ_TYPE_EDGE_RISING>, diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi index 4386f8a9c636..f40509d91bbd 100644 --- a/arch/arm64/boot/dts/qcom/sm6375.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi @@ -1561,7 +1561,7 @@ remoteproc_adsp: remoteproc@a400000 { compatible = "qcom,sm6375-adsp-pas"; reg = <0 0x0a400000 0 0x100>; - interrupts-extended = <&intc GIC_SPI 282 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 282 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>, diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 39bd8f0eba1e..7f2333c9d17d 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -3062,7 +3062,7 @@ slpi: remoteproc@5c00000 { compatible = "qcom,sm8250-slpi-pas"; reg = <0 0x05c00000 0 0x4000>; - interrupts-extended = <&pdc 9 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&pdc 9 IRQ_TYPE_EDGE_RISING>, <&smp2p_slpi_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_slpi_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_slpi_in 2 IRQ_TYPE_EDGE_RISING>, @@ -3766,7 +3766,7 @@ cdsp: remoteproc@8300000 { compatible = "qcom,sm8250-cdsp-pas"; reg = <0 0x08300000 0 0x10000>; - interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>, <&smp2p_cdsp_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_cdsp_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_cdsp_in 2 IRQ_TYPE_EDGE_RISING>, @@ -5928,7 +5928,7 @@ adsp: remoteproc@17300000 { compatible = "qcom,sm8250-adsp-pas"; reg = <0 0x17300000 0 0x100>; - interrupts-extended = <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>, From cb939b9b35426852896790aba2f18f46df34e596 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Sun, 17 Mar 2024 18:59:18 +0530 Subject: [PATCH 088/313] arm64: dts: qcom: x1e80100: Fix the compatible for cluster idle states The compatible's for the cluster/domain idle states of x1e80100 are wrong, fix it. Fixes: af16b00578a7 ("arm64: dts: qcom: Add base X1E80100 dtsi and the QCP dts") Signed-off-by: Rajendra Nayak Reviewed-by: Abel Vesa Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240317132918.1068817-1-quic_rjendra@quicinc.com Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 8e517f76189e..6b40082bac68 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -284,7 +284,7 @@ CLUSTER_C4: cpu-sleep-0 { domain-idle-states { CLUSTER_CL4: cluster-sleep-0 { - compatible = "arm,idle-state"; + compatible = "domain-idle-state"; idle-state-name = "l2-ret"; arm,psci-suspend-param = <0x01000044>; entry-latency-us = <350>; @@ -293,7 +293,7 @@ CLUSTER_CL4: cluster-sleep-0 { }; CLUSTER_CL5: cluster-sleep-1 { - compatible = "arm,idle-state"; + compatible = "domain-idle-state"; idle-state-name = "ret-pll-off"; arm,psci-suspend-param = <0x01000054>; entry-latency-us = <2200>; From 8b8ec83a1d7d3b6605d9163d2e306971295a4ce8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 6 Mar 2024 10:56:50 +0100 Subject: [PATCH 089/313] arm64: dts: qcom: sc8280xp: add missing PCIe minimum OPP Add the missing PCIe CX performance level votes to avoid relying on other drivers (e.g. USB or UFS) to maintain the nominal performance level required for Gen3 speeds. Fixes: 813e83157001 ("arm64: dts: qcom: sc8280xp/sa8540p: add PCIe2-4 nodes") Cc: stable@vger.kernel.org # 6.2 Reviewed-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240306095651.4551-5-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index c9058c7fc1a3..d0f82e12289e 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -1774,6 +1774,7 @@ pcie4: pcie@1c00000 { reset-names = "pci"; power-domains = <&gcc PCIE_4_GDSC>; + required-opps = <&rpmhpd_opp_nom>; phys = <&pcie4_phy>; phy-names = "pciephy"; @@ -1872,6 +1873,7 @@ pcie3b: pcie@1c08000 { reset-names = "pci"; power-domains = <&gcc PCIE_3B_GDSC>; + required-opps = <&rpmhpd_opp_nom>; phys = <&pcie3b_phy>; phy-names = "pciephy"; @@ -1970,6 +1972,7 @@ pcie3a: pcie@1c10000 { reset-names = "pci"; power-domains = <&gcc PCIE_3A_GDSC>; + required-opps = <&rpmhpd_opp_nom>; phys = <&pcie3a_phy>; phy-names = "pciephy"; @@ -2071,6 +2074,7 @@ pcie2b: pcie@1c18000 { reset-names = "pci"; power-domains = <&gcc PCIE_2B_GDSC>; + required-opps = <&rpmhpd_opp_nom>; phys = <&pcie2b_phy>; phy-names = "pciephy"; @@ -2169,6 +2173,7 @@ pcie2a: pcie@1c20000 { reset-names = "pci"; power-domains = <&gcc PCIE_2A_GDSC>; + required-opps = <&rpmhpd_opp_nom>; phys = <&pcie2a_phy>; phy-names = "pciephy"; From ecc3ac293ed15ac2536e9fde2810154486f84010 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 18 Mar 2024 12:49:03 +0530 Subject: [PATCH 090/313] arm64: dts: qcom: sm8450: Fix the msi-map entries While adding the GIC ITS MSI support, it was found that the msi-map entries needed to be swapped to receive MSIs from the endpoint. But later it was identified that the swapping was needed due to a bug in the Qualcomm PCIe controller driver. And since the bug is now fixed with commit bf79e33cdd89 ("PCI: qcom: Enable BDF to SID translation properly"), let's fix the msi-map entries also to reflect the actual mapping in the hardware. Cc: stable@vger.kernel.org # 6.3: bf79e33cdd89 ("PCI: qcom: Enable BDF to SID translation properly") Fixes: ff384ab56f16 ("arm64: dts: qcom: sm8450: Use GIC-ITS for PCIe0 and PCIe1") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240318-pci-bdf-sid-fix-v1-1-acca6c5d9cf1@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm8450.dtsi | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index b86be34a912b..024d2653cc30 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -1777,12 +1777,8 @@ pcie0: pcie@1c00000 { ranges = <0x01000000 0x0 0x00000000 0x0 0x60200000 0x0 0x100000>, <0x02000000 0x0 0x60300000 0x0 0x60300000 0x0 0x3d00000>; - /* - * MSIs for BDF (1:0.0) only works with Device ID 0x5980. - * Hence, the IDs are swapped. - */ - msi-map = <0x0 &gic_its 0x5981 0x1>, - <0x100 &gic_its 0x5980 0x1>; + msi-map = <0x0 &gic_its 0x5980 0x1>, + <0x100 &gic_its 0x5981 0x1>; msi-map-mask = <0xff00>; interrupts = , , @@ -1900,12 +1896,8 @@ pcie1: pcie@1c08000 { ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>, <0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>; - /* - * MSIs for BDF (1:0.0) only works with Device ID 0x5a00. - * Hence, the IDs are swapped. - */ - msi-map = <0x0 &gic_its 0x5a01 0x1>, - <0x100 &gic_its 0x5a00 0x1>; + msi-map = <0x0 &gic_its 0x5a00 0x1>, + <0x100 &gic_its 0x5a01 0x1>; msi-map-mask = <0xff00>; interrupts = , , From 98a953fa2f4095b9777dbf59a3ed2ac3c0bf55cb Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 18 Mar 2024 12:49:04 +0530 Subject: [PATCH 091/313] arm64: dts: qcom: sm8550: Fix the msi-map entries While adding the GIC ITS MSI support, it was found that the msi-map entries needed to be swapped to receive MSIs from the endpoint. But later it was identified that the swapping was needed due to a bug in the Qualcomm PCIe controller driver. And since the bug is now fixed with commit bf79e33cdd89 ("PCI: qcom: Enable BDF to SID translation properly"), let's fix the msi-map entries also to reflect the actual mapping in the hardware. Fixes: 114990ce3edf ("arm64: dts: qcom: sm8550: Use GIC-ITS for PCIe0 and PCIe1") Signed-off-by: Manivannan Sadhasivam Acked-by: Neil Armstrong Tested-by: Neil Armstrong # on SM8550-QRD Link: https://lore.kernel.org/r/20240318-pci-bdf-sid-fix-v1-2-acca6c5d9cf1@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index 3904348075f6..3348bc06db48 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -1755,9 +1755,8 @@ pcie0: pcie@1c00000 { <&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_0 0>; interconnect-names = "pcie-mem", "cpu-pcie"; - /* Entries are reversed due to the unusual ITS DeviceID encoding */ - msi-map = <0x0 &gic_its 0x1401 0x1>, - <0x100 &gic_its 0x1400 0x1>; + msi-map = <0x0 &gic_its 0x1400 0x1>, + <0x100 &gic_its 0x1401 0x1>; iommu-map = <0x0 &apps_smmu 0x1400 0x1>, <0x100 &apps_smmu 0x1401 0x1>; @@ -1867,9 +1866,8 @@ pcie1: pcie@1c08000 { <&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_1 0>; interconnect-names = "pcie-mem", "cpu-pcie"; - /* Entries are reversed due to the unusual ITS DeviceID encoding */ - msi-map = <0x0 &gic_its 0x1481 0x1>, - <0x100 &gic_its 0x1480 0x1>; + msi-map = <0x0 &gic_its 0x1480 0x1>, + <0x100 &gic_its 0x1481 0x1>; iommu-map = <0x0 &apps_smmu 0x1480 0x1>, <0x100 &apps_smmu 0x1481 0x1>; From 6d3bd106ad60383e156f85401c44bf0e56ed6bfc Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 18 Mar 2024 12:49:05 +0530 Subject: [PATCH 092/313] arm64: dts: qcom: sm8650: Fix the msi-map entries While adding the GIC ITS MSI support, it was found that the msi-map entries needed to be swapped to receive MSIs from the endpoint. But later it was identified that the swapping was needed due to a bug in the Qualcomm PCIe controller driver. And since the bug is now fixed with commit bf79e33cdd89 ("PCI: qcom: Enable BDF to SID translation properly"), let's fix the msi-map entries also to reflect the actual mapping in the hardware. Fixes: a33a532b3b1e ("arm64: dts: qcom: sm8650: Use GIC-ITS for PCIe0 and PCIe1") Signed-off-by: Manivannan Sadhasivam Acked-by: Neil Armstrong Tested-by: Neil Armstrong # on SM8650-QRD Link: https://lore.kernel.org/r/20240318-pci-bdf-sid-fix-v1-3-acca6c5d9cf1@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm8650.dtsi | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi index ba72d8f38420..eb117866e59f 100644 --- a/arch/arm64/boot/dts/qcom/sm8650.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi @@ -2274,9 +2274,8 @@ &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, interrupt-map-mask = <0 0 0 0x7>; #interrupt-cells = <1>; - /* Entries are reversed due to the unusual ITS DeviceID encoding */ - msi-map = <0x0 &gic_its 0x1401 0x1>, - <0x100 &gic_its 0x1400 0x1>; + msi-map = <0x0 &gic_its 0x1400 0x1>, + <0x100 &gic_its 0x1401 0x1>; msi-map-mask = <0xff00>; linux,pci-domain = <0>; @@ -2402,9 +2401,8 @@ &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, interrupt-map-mask = <0 0 0 0x7>; #interrupt-cells = <1>; - /* Entries are reversed due to the unusual ITS DeviceID encoding */ - msi-map = <0x0 &gic_its 0x1481 0x1>, - <0x100 &gic_its 0x1480 0x1>; + msi-map = <0x0 &gic_its 0x1480 0x1>, + <0x100 &gic_its 0x1481 0x1>; msi-map-mask = <0xff00>; linux,pci-domain = <1>; From ecda8309098402f878c96184f29a1b7ec682d772 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Thu, 28 Mar 2024 03:21:57 +0100 Subject: [PATCH 093/313] arm64: dts: qcom: sc8180x: Fix ss_phy_irq for secondary USB controller The ACPI DSDT of the Surface Pro X (SQ2) specifies the interrupts for the secondary UBS controller as Name (_CRS, ResourceTemplate () { Interrupt (ResourceConsumer, Level, ActiveHigh, Shared, ,, ) { 0x000000AA, } Interrupt (ResourceConsumer, Level, ActiveHigh, SharedAndWake, ,, ) { 0x000000A7, // hs_phy_irq: &intc GIC_SPI 136 } Interrupt (ResourceConsumer, Level, ActiveHigh, SharedAndWake, ,, ) { 0x00000228, // ss_phy_irq: &pdc 40 } Interrupt (ResourceConsumer, Edge, ActiveHigh, SharedAndWake, ,, ) { 0x0000020A, // dm_hs_phy_irq: &pdc 10 } Interrupt (ResourceConsumer, Edge, ActiveHigh, SharedAndWake, ,, ) { 0x0000020B, // dp_hs_phy_irq: &pdc 11 } }) Generally, the interrupts above 0x200 map to the PDC interrupts (as used in the devicetree) as ACPI_NUMBER - 0x200. Note that this lines up with dm_hs_phy_irq and dp_hs_phy_irq (as well as the interrupts for the primary USB controller). Based on the snippet above, ss_phy_irq should therefore be PDC 40 (= 0x28) and not PDC 7. The latter is according to ACPI instead used as ss_phy_irq for port 0 of the multiport USB controller). Fix this by setting ss_phy_irq to '&pdc 40'. Fixes: b080f53a8f44 ("arm64: dts: qcom: sc8180x: Add remoteprocs, wifi and usb nodes") Signed-off-by: Maximilian Luz Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240328022224.336938-1-luzmaximilian@gmail.com Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc8180x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc8180x.dtsi b/arch/arm64/boot/dts/qcom/sc8180x.dtsi index 32afc78d5b76..053f7861c3ce 100644 --- a/arch/arm64/boot/dts/qcom/sc8180x.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8180x.dtsi @@ -2701,7 +2701,7 @@ usb_sec: usb@a8f8800 { resets = <&gcc GCC_USB30_SEC_BCR>; power-domains = <&gcc USB30_SEC_GDSC>; interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, - <&pdc 7 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 40 IRQ_TYPE_LEVEL_HIGH>, <&pdc 10 IRQ_TYPE_EDGE_BOTH>, <&pdc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", From 7d045025a24b6336d444d359bd4312f351d017f9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Mar 2024 21:43:03 +0200 Subject: [PATCH 094/313] gpio: tangier: Use correct type for the IRQ chip data IRQ chip data contains a pointer to the GPIO chip. Luckily we have the pointers the same, but strictly speaking it's not guaranteed. Even though, still better to fix this. Fixes: ccf6fd6dcc86 ("gpio: merrifield: Introduce GPIO driver to support Merrifield") Signed-off-by: Andy Shevchenko --- drivers/gpio/gpio-tangier.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-tangier.c b/drivers/gpio/gpio-tangier.c index b75e0b12087a..4b29abafecf6 100644 --- a/drivers/gpio/gpio-tangier.c +++ b/drivers/gpio/gpio-tangier.c @@ -195,7 +195,8 @@ static int tng_gpio_set_config(struct gpio_chip *chip, unsigned int offset, static void tng_irq_ack(struct irq_data *d) { - struct tng_gpio *priv = irq_data_get_irq_chip_data(d); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct tng_gpio *priv = gpiochip_get_data(gc); irq_hw_number_t gpio = irqd_to_hwirq(d); void __iomem *gisr; u8 shift; @@ -227,7 +228,8 @@ static void tng_irq_unmask_mask(struct tng_gpio *priv, u32 gpio, bool unmask) static void tng_irq_mask(struct irq_data *d) { - struct tng_gpio *priv = irq_data_get_irq_chip_data(d); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct tng_gpio *priv = gpiochip_get_data(gc); irq_hw_number_t gpio = irqd_to_hwirq(d); tng_irq_unmask_mask(priv, gpio, false); @@ -236,7 +238,8 @@ static void tng_irq_mask(struct irq_data *d) static void tng_irq_unmask(struct irq_data *d) { - struct tng_gpio *priv = irq_data_get_irq_chip_data(d); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct tng_gpio *priv = gpiochip_get_data(gc); irq_hw_number_t gpio = irqd_to_hwirq(d); gpiochip_enable_irq(&priv->chip, gpio); From d2d73a6dd17365c43e109263841f7c26da55cfb0 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Fri, 12 Apr 2024 12:50:26 +0200 Subject: [PATCH 095/313] mtd: limit OTP NVMEM cell parse to non-NAND devices MTD OTP logic is very fragile on parsing NVMEM cell and can be problematic with some specific kind of devices. The problem was discovered by e87161321a40 ("mtd: rawnand: macronix: OTP access for MX30LFxG18AC") where OTP support was added to a NAND device. With the case of NAND devices, it does require a node where ECC info are declared and all the fixed partitions, and this cause the OTP codepath to parse this node as OTP NVMEM cells, making probe fail and the NAND device registration fail. MTD OTP parsing should have been limited to always using compatible to prevent this error by using node with compatible "otp-user" or "otp-factory". NVMEM across the years had various iteration on how cells could be declared in DT, in some old implementation, no_of_node should have been enabled but now add_legacy_fixed_of_cells should be used to disable NVMEM to parse child node as NVMEM cell. To fix this and limit any regression with other MTD that makes use of declaring OTP as direct child of the dev node, disable add_legacy_fixed_of_cells if we detect the MTD type is Nand. With the following logic, the OTP NVMEM entry is correctly created with no cells and the MTD Nand is correctly probed and partitions are correctly exposed. Fixes: 4b361cfa8624 ("mtd: core: add OTP nvmem provider support") Cc: # v6.7+ Signed-off-by: Christian Marangi Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240412105030.1598-1-ansuelsmth@gmail.com --- drivers/mtd/mtdcore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 5887feb347a4..0de87bc63840 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -900,7 +900,7 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd, config.name = compatible; config.id = NVMEM_DEVID_AUTO; config.owner = THIS_MODULE; - config.add_legacy_fixed_of_cells = true; + config.add_legacy_fixed_of_cells = !mtd_type_is_nand(mtd); config.type = NVMEM_TYPE_OTP; config.root_only = true; config.ignore_wp = true; From abbb99301e9d2c91567e1893dbe34f2f8b52ea9a Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 9 Apr 2024 09:30:44 +0200 Subject: [PATCH 096/313] docs: verify/bisect: use git switch, tag kernel, and various fixes Various small improvements and fixes: * Use the more modern 'git switch' instead of 'git checkout', which makes it more obvious what's happening (among others due to the --discard-changes parameter that is more clear than --force). * Provide a hint how a mainline version number and one from a stable series look like. * When trying to validate the bisection result with a revert, add a special tag to facilitate the identification. * Sync version numbers used in various examples for consistency: stick to 6.0.13, 6.0.15, and 6.1.5. * Fix a few typos and oddities. Signed-off-by: Thorsten Leemhuis Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/85029aa004447b0eeb5043fb014630f2acafacec.1712647788.git.linux@leemhuis.info --- .../verify-bugs-and-bisect-regressions.rst | 117 ++++++++++-------- 1 file changed, 67 insertions(+), 50 deletions(-) diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst index d3504826f401..c999e40c79ab 100644 --- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst +++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst @@ -38,8 +38,8 @@ aspects, all of which might be essential in your present case.]* **In case you want to check if a bug is present in code currently supported by developers**, execute just the *preparations* and *segment 1*; while doing so, consider the newest Linux kernel you regularly use to be the 'working' kernel. -In the following example that's assumed to be 6.0.13, which is why the sources -of 6.0 will be used to prepare the .config file. +In the following example that's assumed to be 6.0, which is why its sources +will be used to prepare the .config file. **In case you face a regression**, follow the steps at least till the end of *segment 2*. Then you can submit a preliminary report -- or continue with @@ -61,7 +61,7 @@ will be considered the 'good' release and used to prepare the .config file. cd ~/linux/ git remote add -t master stable \ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git - git checkout --detach v6.0 + git switch --detach v6.0 # * Hint: if you used an existing clone, ensure no stale .config is around. make olddefconfig # * Ensure the former command picked the .config of the 'working' kernel. @@ -87,7 +87,7 @@ will be considered the 'good' release and used to prepare the .config file. a) Checking out latest mainline code:: cd ~/linux/ - git checkout --force --detach mainline/master + git switch --discard-changes --detach mainline/master b) Build, install, and boot a kernel:: @@ -125,7 +125,7 @@ will be considered the 'good' release and used to prepare the .config file. a) Start by checking out the sources of the 'good' version:: cd ~/linux/ - git checkout --force --detach v6.0 + git switch --discard-changes --detach v6.0 b) Build, install, and boot a kernel as described earlier in *segment 1, section b* -- just feel free to skip the 'du' commands, as you have a rough @@ -157,11 +157,12 @@ will be considered the 'good' release and used to prepare the .config file. works with the newly built kernel. If it does, tell Git by executing ``git bisect good``; if it does not, run ``git bisect bad`` instead. - All three commands will make Git checkout another commit; then re-execute + All three commands will make Git check out another commit; then re-execute this step (e.g. build, install, boot, and test a kernel to then tell Git the outcome). Do so again and again until Git shows which commit broke things. If you run short of disk space during this process, check the - "Supplementary tasks" section below. + section 'Supplementary tasks: cleanup during and after the process' + below. d) Once your finished the bisection, put a few things away:: @@ -172,12 +173,15 @@ will be considered the 'good' release and used to prepare the .config file. e) Try to verify the bisection result:: - git checkout --force --detach mainline/master + git switch --discard-changes --detach mainline/master git revert --no-edit cafec0cacaca0 + cp ~/kernel-config-working .config + ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted' This is optional, as some commits are impossible to revert. But if the second command worked flawlessly, build, install, and boot one more kernel - kernel, which should not show the regression. + kernel; just this time skip the first command copying the base .config file + over, as that already has been taken care off. * **Supplementary tasks**: cleanup during and after the process. @@ -208,7 +212,7 @@ Step-by-step guide on how to verify bugs and bisect regressions =============================================================== This guide describes how to set up your own Linux kernels for investigating bugs -or regressions you intent to report. How far you want to follow the instructions +or regressions you intend to report. How far you want to follow the instructions depends on your issue: Execute all steps till the end of *segment 1* to **verify if your kernel problem @@ -240,12 +244,17 @@ to get things rolling again. For further details on how to report Linux kernel issues or regressions check out Documentation/admin-guide/reporting-issues.rst, which works in conjunction with this document. It among others explains why you need to verify bugs with -the latest 'mainline' kernel, even if you face a problem with a kernel from a -'stable/longterm' series; for users facing a regression it also explains that -sending a preliminary report after finishing segment 2 might be wise, as the -regression and its culprit might be known already. For further details on -what actually qualifies as a regression check out -Documentation/admin-guide/reporting-regressions.rst. +the latest 'mainline' kernel (e.g. versions like 6.0, 6.1-rc1, or 6.1-rc6), +even if you face a problem with a kernel from a 'stable/longterm' series +(say 6.0.13). + +For users facing a regression that document also explains why sending a +preliminary report after segment 2 might be wise, as the regression and its +culprit might be known already. For further details on what actually qualifies +as a regression check out Documentation/admin-guide/reporting-regressions.rst. + +If you run into any problems while following this guide or have ideas how to +improve it, :ref:`please let the kernel developers know `. .. _introprep_bissbs: @@ -286,7 +295,7 @@ Preparations: set up everything to build your own kernels Do you follow this guide to verify if a bug is present in the code developers care for? Then consider the mainline release your 'working' kernel (the newest one you regularly use) is based on to be the 'good' version; if your 'working' - kernel for example is 6.0.11, then your 'good' kernel is 6.0. + kernel for example is 6.0.13, then your 'good' kernel is 6.0. In case you face a regression, it depends on the version range where the regression was introduced: @@ -295,14 +304,14 @@ Preparations: set up everything to build your own kernels 6.1-rc1? Then henceforth regard 6.0 as the last known 'good' version and 6.1-rc1 as the first 'bad' one. - * Some function stopped working when updating from 6.0.11 to 6.1.4? Then for - the time being consider 6.0 as the last 'good' version and 6.1.4 as + * Some function stopped working when updating from 6.0.13 to 6.1.5? Then for + the time being consider 6.0 as the last 'good' version and 6.1.5 as the 'bad' one. Note, at this point it is merely assumed that 6.0 is fine; this assumption will be checked in segment 2. - * A feature you used in 6.0.11 does not work at all or worse in 6.1.13? In + * A feature you used in 6.0.13 does not work at all or worse in 6.1.15? In that case you want to bisect within a stable/longterm series: consider - 6.0.11 as the last known 'good' version and 6.0.13 as the first 'bad' + 6.0.13 as the last known 'good' version and 6.0.15 as the first 'bad' one. Note, in this case you still want to compile and test a mainline kernel as explained in segment 1: the outcome will determine if you need to report your issue to the regular developers or the stable team. @@ -367,7 +376,7 @@ Preparations: set up everything to build your own kernels * Start preparing a kernel build configuration (the '.config' file). Before doing so, ensure you are still running the 'working' kernel an earlier - step told you to boot; if you are unsure, check the current kernel release + step told you to boot; if you are unsure, check the current kernelrelease identifier using ``uname -r``. Afterwards check out the source code for the version earlier established as @@ -375,7 +384,7 @@ Preparations: set up everything to build your own kernels the version number in this and all later Git commands needs to be prefixed with a 'v':: - git checkout --detach v6.0 + git switch --discard-changes --detach v6.0 Now create a build configuration file:: @@ -505,7 +514,7 @@ be a waste of time. [:ref:`details`] * Check out the latest Linux codebase:: cd ~/linux/ - git checkout --force --detach mainline/master + git switch --discard-changes --detach mainline/master [:ref:`details`] @@ -617,7 +626,7 @@ be a waste of time. [:ref:`details`] cd ~/linux/ git remote set-branches --add stable linux-6.0.y git fetch stable - git checkout --force --detach linux-6.0.y + git switch --discard-changes --detach linux-6.0.y Now use the checked out code to build and install another kernel using the commands the earlier steps already described in more detail:: @@ -669,7 +678,7 @@ otherwise would be a waste of time. [:ref:`details`] 'good' (once again assumed to be 6.0 here):: cd ~/linux/ - git checkout --detach v6.0 + git switch --discard-changes --detach v6.0 Now use the checked out code to configure, build, and install another kernel using the commands the previous subsection explained in more detail:: @@ -703,7 +712,7 @@ Segment 3: perform the bisection and validate the result With all the preparations and precaution builds taken care of, you are now ready to begin the bisection. This will make you build quite a few kernels -- usually about 15 in case you encountered a regression when updating to a newer series -(say from 6.0.11 to 6.1.3). But do not worry, due to the trimmed build +(say from 6.0.13 to 6.1.5). But do not worry, due to the trimmed build configuration created earlier this works a lot faster than many people assume: overall on average it will often just take about 10 to 15 minutes to compile each kernel on commodity x86 machines. @@ -745,7 +754,7 @@ each kernel on commodity x86 machines. If compilation fails for some reason, run ``git bisect skip`` and restart executing the stack of commands from the beginning. - In case you skipped the "test latest codebase" step in the guide, check its + In case you skipped the 'test latest codebase' step in the guide, check its description as for why the 'df [...]' and 'make -s kernelrelease [...]' commands are here. @@ -823,16 +832,16 @@ each kernel on commodity x86 machines. Begin by checking out the latest codebase depending on the range you bisected: * Did you face a regression within a stable/longterm series (say between - 6.0.11 and 6.0.13) that does not happen in mainline? Then check out the + 6.0.13 and 6.0.15) that does not happen in mainline? Then check out the latest codebase for the affected series like this:: git fetch stable - git checkout --force --detach linux-6.0.y + git switch --discard-changes --detach linux-6.0.y * In all other cases check out latest mainline:: git fetch mainline - git checkout --force --detach mainline/master + git switch --discard-changes --detach mainline/master If you bisected a regression within a stable/longterm series that also happens in mainline, there is one more thing to do: look up the mainline @@ -846,21 +855,27 @@ each kernel on commodity x86 machines. git revert --no-edit cafec0cacaca0 - If that fails, give up trying and move on to the next step. But if it works, - build a kernel again using the familiar command sequence:: + If that fails, give up trying and move on to the next step; if it works, + adjust the tag to facilitate the identification and prevent accidentally + overwriting another kernel:: cp ~/kernel-config-working .config + ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted' + + Build a kernel using the familiar command sequence, just without copying the + the base .config over:: + make olddefconfig && - make -j $(nproc --all) && + make -j $(nproc --all) # * Check if the free space suffices holding another kernel: df -h /boot/ /lib/modules/ sudo make modules_install command -v installkernel && sudo make install - Make -s kernelrelease | tee -a ~/kernels-built + make -s kernelrelease | tee -a ~/kernels-built reboot - Now check one last time if the feature that made you perform a bisection work - with that kernel. + Now check one last time if the feature that made you perform a bisection works + with that kernel: if everything went well, it should not show the regression. [:ref:`details`] @@ -934,10 +949,12 @@ This concludes the step-by-step guide. Did you run into trouble following any of the above steps not cleared up by the reference section below? Did you spot errors? Or do you have ideas how to -improve the guide? Then please take a moment and let the maintainer of this +improve the guide? + +If any of that applies, please take a moment and let the maintainer of this document know by email (Thorsten Leemhuis ), ideally while CCing the Linux docs mailing list (linux-doc@vger.kernel.org). Such feedback is -vital to improve this document further, which is in everybody's interest, as it +vital to improve this text further, which is in everybody's interest, as it will enable more people to master the task described here -- and hopefully also improve similar guides inspired by this one. @@ -1059,18 +1076,18 @@ Bisection range Establishing the range of commits to be checked is mostly straightforward, except when a regression occurred when switching from a release of one stable -series to a release of a later series (e.g. from 6.0.11 to 6.1.4). In that case +series to a release of a later series (e.g. from 6.0.13 to 6.1.5). In that case Git will need some hand holding, as there is no straight line of descent. That's because with the release of 6.0 mainline carried on to 6.1 while the stable series 6.0.y branched to the side. It's therefore theoretically possible -that the issue you face with 6.1.4 only worked in 6.0.11, as it was fixed by a +that the issue you face with 6.1.5 only worked in 6.0.13, as it was fixed by a commit that went into one of the 6.0.y releases, but never hit mainline or the 6.1.y series. Thankfully that normally should not happen due to the way the stable/longterm maintainers maintain the code. It's thus pretty safe to assume 6.0 as a 'good' kernel. That assumption will be tested anyway, as that kernel will be built and tested in the segment '2' of this guide; Git would force you -to do this as well, if you tried bisecting between 6.0.11 and 6.1.13. +to do this as well, if you tried bisecting between 6.0.13 and 6.1.15. [:ref:`back to step-by-step guide `] @@ -1117,7 +1134,7 @@ These commands install a few packages that are often, but not always needed. You for example might want to skip installing the development headers for ncurses, which you will only need in case you later might want to adjust the kernel build configuration using make the targets 'menuconfig' or 'nconfig'; likewise omit -the headers of Qt6 is you do not plan to adjust the .config using 'xconfig'. +the headers of Qt6 if you do not plan to adjust the .config using 'xconfig'. You furthermore might need additional libraries and their development headers for tasks not covered in this guide -- for example when building utilities from @@ -1184,7 +1201,7 @@ First, execute the following command to retrieve the latest mainline codebase:: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git Now deepen your clone's history to the second predecessor of the mainline -release of your 'good' version. In case the latter are 6.0 or 6.0.11, 5.19 would +release of your 'good' version. In case the latter are 6.0 or 6.0.13, 5.19 would be the first predecessor and 5.18 the second -- hence deepen the history up to that version:: @@ -1490,7 +1507,7 @@ highly recommended for these reasons: Your report might be ignored if you send it to the wrong party -- and even when you get a reply there is a decent chance that developers tell you to - evaluate which of the two cases it is before they take a closer look. + evaluate which of the two cases it is before they take a closer look. [:ref:`back to step-by-step guide `] @@ -1552,8 +1569,8 @@ by modifying your search terms or using another line from the error messages. In the end, most issues you run into have likely been encountered and reported by others already. That includes issues where the cause is not your -system, but lies in the code. If you run into one of those, you might thus find a -solution (e.g. a patch) or workaround for your issue, too. +system, but lies in the code. If you run into one of those, you might thus find +a solution (e.g. a patch) or workaround for your issue, too. Package your kernel up ~~~~~~~~~~~~~~~~~~~~~~ @@ -1767,8 +1784,8 @@ multitude of reasons why this might happen. Some ideas where to look: Note, if you found and fixed problems with the .config file, you want to use it to build another kernel from the latest codebase, as your earlier tests with -mainline and the latest version from an affected stable/longterm series were most -likely flawed. +mainline and the latest version from an affected stable/longterm series were +most likely flawed. [:ref:`back to step-by-step guide `] @@ -1911,7 +1928,7 @@ Now remove the boot entry for the kernel from your bootloader's configuration; the steps to do that vary quite a bit between Linux distributions. Note, be careful with wildcards like '*' when deleting files or directories -for kernels manually: you might accidentally remove files of a 6.0.11 kernel +for kernels manually: you might accidentally remove files of a 6.0.13 kernel when all you want is to remove 6.0 or 6.0.1. [:ref:`back to step-by-step guide `] From 932c9a5398a7b41cb8e7a0264e5470133b373e11 Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 9 Apr 2024 09:30:45 +0200 Subject: [PATCH 097/313] docs: verify/bisect: add and fetch stable branches ahead of time Add and fetch all required stable branches ahead of time. This fixes a bug, as readers that wanted to bisect a regression within a stable or longterm series otherwise did not have them available at the right time. This way also matches the flow somewhat better and avoids some "if you haven't already added it" phrases that otherwise become necessary in future changes. Signed-off-by: Thorsten Leemhuis Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/57dcf312959476abe6151bf3d35eb79e3e9a83d1.1712647788.git.linux@leemhuis.info --- .../verify-bugs-and-bisect-regressions.rst | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst index c999e40c79ab..06278501a4bd 100644 --- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst +++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst @@ -136,8 +136,7 @@ will be considered the 'good' release and used to prepare the .config file. * **Segment 3**: perform and validate the bisection. - a) In case your 'broken' version is a stable/longterm release, add the Git - branch holding it:: + a) Retrieve the sources for your 'bad' version:: git remote set-branches --add stable linux-6.1.y git fetch stable @@ -371,6 +370,21 @@ Preparations: set up everything to build your own kernels [:ref:`details`] +.. _stablesources_bissbs: + +* Retrieve the sources for any stable or longterm series you might need. + + Is the version you earlier established as 'bad' a stable or longterm release? + Then download the code for the series it belongs to ('linux-6.1.y' in this + example):: + + git remote set-branches --add stable linux-6.1.y + git fetch stable + + If the version earlier established as 'good' is from a different stable or + longterm series (say 6.0.13), repeat the previous step, but this time for the + branch holding the series the 'good' version belongs to (e.g. linux-6.0.y). + .. _oldconfig_bissbs: * Start preparing a kernel build configuration (the '.config' file). @@ -620,12 +634,10 @@ be a waste of time. [:ref:`details`] reproduce it with the mainline kernel you just built? One that according to the `front page of kernel.org `_ is still supported? Then check if the latest codebase for the particular series might already fix the - problem. To do so, add the stable series Git branch for your 'good' kernel - (again, this here is assumed to be 6.0) and check out the latest version:: + problem. To do so, check out that series latest version (again, this here is + assumed to be 6.0):: cd ~/linux/ - git remote set-branches --add stable linux-6.0.y - git fetch stable git switch --discard-changes --detach linux-6.0.y Now use the checked out code to build and install another kernel using the @@ -717,13 +729,6 @@ configuration created earlier this works a lot faster than many people assume: overall on average it will often just take about 10 to 15 minutes to compile each kernel on commodity x86 machines. -* In case your 'bad' version is a stable/longterm release (say 6.1.5), add its - stable branch, unless you already did so earlier:: - - cd ~/linux/ - git remote set-branches --add stable linux-6.1.y - git fetch stable - .. _bisectstart_bissbs: * Start the bisection and tell Git about the versions earlier established as From 453de3207ff3534dd7165a32a73dd28cc9e8f14f Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 9 Apr 2024 09:30:46 +0200 Subject: [PATCH 098/313] docs: verify/bisect: proper headlines and more spacing Various small improvements and fixes: * Separate ref links from their target with a space for better readability. * Add a proper heading for the note at the end of the step-by-step guide. * Use proper 3rd and 4th level headlines in the reference section and add short intros for the 2nd level headlines that lacked one. Signed-off-by: Thorsten Leemhuis Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/f59f0f235a2192ed93899a7338153e4cb71075f0.1712647788.git.linux@leemhuis.info --- .../verify-bugs-and-bisect-regressions.rst | 194 ++++++++++-------- 1 file changed, 113 insertions(+), 81 deletions(-) diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst index 06278501a4bd..355c2cea5230 100644 --- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst +++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst @@ -29,7 +29,7 @@ The essence of the process (aka 'TL;DR') ======================================== *[If you are new to building or bisecting Linux, ignore this section and head -over to the* ":ref:`step-by-step guide`" *below. It utilizes +over to the* ':ref:`step-by-step guide `' *below. It utilizes the same commands as this section while describing them in brief fashion. The steps are nevertheless easy to follow and together with accompanying entries in a reference section mention many alternatives, pitfalls, and additional @@ -224,15 +224,15 @@ report; instead of the latter your could also head straight on and follow *segment 3* to **perform a bisection** for a full-fledged regression report developers are obliged to act upon. - :ref:`Preparations: set up everything to build your own kernels.` + :ref:`Preparations: set up everything to build your own kernels `. - :ref:`Segment 1: try to reproduce the problem with the latest codebase.` + :ref:`Segment 1: try to reproduce the problem with the latest codebase `. - :ref:`Segment 2: check if the kernels you build work fine.` + :ref:`Segment 2: check if the kernels you build work fine `. - :ref:`Segment 3: perform a bisection and validate the result.` + :ref:`Segment 3: perform a bisection and validate the result `. - :ref:`Supplementary tasks: cleanup during and after following this guide.` + :ref:`Supplementary tasks: cleanup during and after following this guide `. The steps in each segment illustrate the important aspects of the process, while a comprehensive reference section holds additional details for almost all of the @@ -260,12 +260,14 @@ improve it, :ref:`please let the kernel developers know `. Preparations: set up everything to build your own kernels --------------------------------------------------------- +The following steps lay the groundwork for all further tasks. + .. _backup_bissbs: * Create a fresh backup and put system repair and restore tools at hand, just to be prepared for the unlikely case of something going sideways. - [:ref:`details`] + [:ref:`details `] .. _vanilla_bissbs: @@ -273,7 +275,7 @@ Preparations: set up everything to build your own kernels builds them automatically. That includes but is not limited to DKMS, openZFS, VirtualBox, and Nvidia's graphics drivers (including the GPLed kernel module). - [:ref:`details`] + [:ref:`details `] .. _secureboot_bissbs: @@ -284,7 +286,7 @@ Preparations: set up everything to build your own kernels their restrictions through a process initiated by ``mokutil --disable-validation``. - [:ref:`details`] + [:ref:`details `] .. _rangecheck_bissbs: @@ -319,13 +321,13 @@ Preparations: set up everything to build your own kernels throughout this guide will refer to the last kernel that has been working fine.* - [:ref:`details`] + [:ref:`details `] .. _bootworking_bissbs: * Boot into the 'working' kernel and briefly use the apparently broken feature. - [:ref:`details`] + [:ref:`details `] .. _diskspace_bissbs: @@ -335,7 +337,7 @@ Preparations: set up everything to build your own kernels debug symbols: both explain approaches reducing the amount of space, which should allow you to master these tasks with about 4 Gigabytes free space. - [:ref:`details`] + [:ref:`details `] .. _buildrequires_bissbs: @@ -345,7 +347,7 @@ Preparations: set up everything to build your own kernels reference section shows how to quickly install those on various popular Linux distributions. - [:ref:`details`] + [:ref:`details `] .. _sources_bissbs: @@ -368,7 +370,7 @@ Preparations: set up everything to build your own kernels git remote add -t master stable \ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git - [:ref:`details`] + [:ref:`details `] .. _stablesources_bissbs: @@ -421,7 +423,7 @@ Preparations: set up everything to build your own kernels 'make olddefconfig' again and check if it now picked up the right config file as base. - [:ref:`details`] + [:ref:`details `] .. _localmodconfig_bissbs: @@ -455,7 +457,7 @@ Preparations: set up everything to build your own kernels spending much effort on, as long as it boots and allows to properly test the feature that causes trouble. - [:ref:`details`] + [:ref:`details `] .. _tagging_bissbs: @@ -465,7 +467,7 @@ Preparations: set up everything to build your own kernels ./scripts/config --set-str CONFIG_LOCALVERSION '-local' ./scripts/config -e CONFIG_LOCALVERSION_AUTO - [:ref:`details`] + [:ref:`details `] .. _debugsymbols_bissbs: @@ -484,7 +486,7 @@ Preparations: set up everything to build your own kernels ./scripts/config -d DEBUG_INFO -d DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT \ -d DEBUG_INFO_DWARF4 -d DEBUG_INFO_DWARF5 -e CONFIG_DEBUG_INFO_NONE - [:ref:`details`] + [:ref:`details `] .. _configmods_bissbs: @@ -494,14 +496,14 @@ Preparations: set up everything to build your own kernels * Are you running Debian? Then you want to avoid known problems by performing additional adjustments explained in the reference section. - [:ref:`details`]. + [:ref:`details `]. * If you want to influence other aspects of the configuration, do so now using your preferred tool. Note, to use make targets like 'menuconfig' or 'nconfig', you will need to install the development files of ncurses; for 'xconfig' you likewise need the Qt5 or Qt6 headers. - [:ref:`details`]. + [:ref:`details `]. .. _saveconfig_bissbs: @@ -511,7 +513,7 @@ Preparations: set up everything to build your own kernels make olddefconfig cp .config ~/kernel-config-working - [:ref:`details`] + [:ref:`details `] .. _introlatestcheck_bissbs: @@ -521,7 +523,7 @@ Segment 1: try to reproduce the problem with the latest codebase The following steps verify if the problem occurs with the code currently supported by developers. In case you face a regression, it also checks that the problem is not caused by some .config change, as reporting the issue then would -be a waste of time. [:ref:`details`] +be a waste of time. [:ref:`details `] .. _checkoutmaster_bissbs: @@ -530,7 +532,7 @@ be a waste of time. [:ref:`details`] cd ~/linux/ git switch --discard-changes --detach mainline/master - [:ref:`details`] + [:ref:`details `] .. _build_bissbs: @@ -545,7 +547,7 @@ be a waste of time. [:ref:`details`] reference section for alternatives, which obviously will require other steps to install as well. - [:ref:`details`] + [:ref:`details `] .. _install_bissbs: @@ -578,7 +580,7 @@ be a waste of time. [:ref:`details`] down: if you will build more kernels as described in segment 2 and 3, you will have to perform those again after executing ``command -v installkernel [...]``. - [:ref:`details`] + [:ref:`details `] .. _storagespace_bissbs: @@ -591,7 +593,7 @@ be a waste of time. [:ref:`details`] Write down or remember those two values for later: they enable you to prevent running out of disk space accidentally during a bisection. - [:ref:`details`] + [:ref:`details `] .. _kernelrelease_bissbs: @@ -618,7 +620,7 @@ be a waste of time. [:ref:`details`] If that command does not return '0', check the reference section, as the cause for this might interfere with your testing. - [:ref:`details`] + [:ref:`details `] .. _recheckbroken_bissbs: @@ -626,7 +628,7 @@ be a waste of time. [:ref:`details`] out the instructions in the reference section to ensure nothing went sideways during your tests. - [:ref:`details`] + [:ref:`details `] .. _recheckstablebroken_bissbs: @@ -662,12 +664,12 @@ be a waste of time. [:ref:`details`] Now verify if this kernel is showing the problem. - [:ref:`details`] + [:ref:`details `] Do you follow this guide to verify if a problem is present in the code currently supported by Linux kernel developers? Then you are done at this point. If you later want to remove the kernel you just built, check out -:ref:`Supplementary tasks: cleanup during and after following this guide`. +:ref:`Supplementary tasks: cleanup during and after following this guide `. In case you face a regression, move on and execute at least the next segment as well. @@ -679,7 +681,7 @@ Segment 2: check if the kernels you build work fine In case of a regression, you now want to ensure the trimmed configuration file you created earlier works as expected; a bisection with the .config file -otherwise would be a waste of time. [:ref:`details`] +otherwise would be a waste of time. [:ref:`details `] .. _recheckworking_bissbs: @@ -714,7 +716,7 @@ otherwise would be a waste of time. [:ref:`details`] Now check if this kernel works as expected; if not, consult the reference section for further instructions. - [:ref:`details`] + [:ref:`details `] .. _introbisect_bissbs: @@ -739,7 +741,7 @@ each kernel on commodity x86 machines. git bisect good v6.0 git bisect bad v6.1.5 - [:ref:`details`] + [:ref:`details `] .. _bisectbuild_bissbs: @@ -768,7 +770,7 @@ each kernel on commodity x86 machines. totally normal to see release identifiers like '6.0-rc1-local-gcafec0cacaca0' if you bisect between versions 6.1 and 6.2 for example. - [:ref:`details`] + [:ref:`details `] .. _bisecttest_bissbs: @@ -808,7 +810,7 @@ each kernel on commodity x86 machines. might need to scroll up to see the message mentioning the culprit; alternatively, run ``git bisect log > ~/bisection-log``. - [:ref:`details`] + [:ref:`details `] .. _bisectlog_bissbs: @@ -820,7 +822,7 @@ each kernel on commodity x86 machines. cp .config ~/bisection-config-culprit git bisect reset - [:ref:`details`] + [:ref:`details `] .. _revert_bissbs: @@ -882,7 +884,7 @@ each kernel on commodity x86 machines. Now check one last time if the feature that made you perform a bisection works with that kernel: if everything went well, it should not show the regression. - [:ref:`details`] + [:ref:`details `] .. _introclosure_bissbs: @@ -923,7 +925,7 @@ space might run out. kernel image and related files behind; in that case remove them as described in the reference section. - [:ref:`details`] + [:ref:`details `] .. _finishingtouch_bissbs: @@ -946,11 +948,15 @@ space might run out. the version considered 'good', and the last three or four you compiled during the actual bisection process. - [:ref:`details`] + [:ref:`details `] + .. _submit_improvements: -This concludes the step-by-step guide. +Conclusion +---------- + +You have reached the end of the step-by-step guide. Did you run into trouble following any of the above steps not cleared up by the reference section below? Did you spot errors? Or do you have ideas how to @@ -970,10 +976,20 @@ Reference section for the step-by-step guide This section holds additional information for almost all the items in the above step-by-step guide. +Preparations for building your own kernels +------------------------------------------ + + *The steps in this section lay the groundwork for all further tests.* + [:ref:`... `] + +The steps in all later sections of this guide depend on those described here. + +[:ref:`back to step-by-step guide `]. + .. _backup_bisref: Prepare for emergencies ------------------------ +~~~~~~~~~~~~~~~~~~~~~~~ *Create a fresh backup and put system repair and restore tools at hand.* [:ref:`... `] @@ -988,7 +1004,7 @@ for something going sideways, even if that should not happen. .. _vanilla_bisref: Remove anything related to externally maintained kernel modules ---------------------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Remove all software that depends on externally developed kernel drivers or builds them automatically.* [:ref:`...`] @@ -1006,7 +1022,7 @@ explains in more detail. .. _secureboot_bisref: Deal with techniques like Secure Boot -------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *On platforms with 'Secure Boot' or similar techniques, prepare everything to ensure the system will permit your self-compiled kernel to boot later.* @@ -1043,7 +1059,7 @@ Afterwards, permit MokManager to reboot the machine. .. _bootworking_bisref: Boot the last kernel that was working -------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Boot into the last working kernel and briefly recheck if the feature that regressed really works.* [:ref:`...`] @@ -1056,7 +1072,7 @@ the right thing. .. _diskspace_bisref: Space requirements ------------------- +~~~~~~~~~~~~~~~~~~ *Ensure to have enough free space for building Linux.* [:ref:`... `] @@ -1074,7 +1090,7 @@ space by quite a few gigabytes. .. _rangecheck_bisref: Bisection range ---------------- +~~~~~~~~~~~~~~~ *Determine the kernel versions considered 'good' and 'bad' throughout this guide.* [:ref:`...`] @@ -1099,7 +1115,7 @@ to do this as well, if you tried bisecting between 6.0.13 and 6.1.15. .. _buildrequires_bisref: Install build requirements --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ *Install all software required to build a Linux kernel.* [:ref:`...`] @@ -1150,7 +1166,7 @@ the kernel's tools/ directory. .. _sources_bisref: Download the sources using Git ------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Retrieve the Linux mainline sources.* [:ref:`...`] @@ -1170,7 +1186,7 @@ work better for you: .. _sources_bundle_bisref: Downloading Linux mainline sources using a bundle -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""""""""""""""""""""""""""""""""""""""""""""""""" Use the following commands to retrieve the Linux mainline sources using a bundle:: @@ -1241,7 +1257,7 @@ Note, shallow clones have a few peculiar characteristics: .. _oldconfig_bisref: Start defining the build configuration for your kernel ------------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Start preparing a kernel build configuration (the '.config' file).* [:ref:`... `] @@ -1301,7 +1317,7 @@ that file to the build machine and store it as ~/linux/.config; afterwards run .. _localmodconfig_bisref: Trim the build configuration for your kernel --------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Disable any kernel modules apparently superfluous for your setup.* [:ref:`... `] @@ -1350,7 +1366,7 @@ step-by-step guide mentions:: .. _tagging_bisref: Tag the kernels about to be build ---------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Ensure all the kernels you will build are clearly identifiable using a special tag and a unique version identifier.* [:ref:`... `] @@ -1366,7 +1382,7 @@ confusing during the bisection. .. _debugsymbols_bisref: Decide to enable or disable debug symbols ------------------------------------------ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Decide how to handle debug symbols.* [:ref:`... `] @@ -1395,7 +1411,7 @@ explains this process in more detail. .. _configmods_bisref: Adjust build configuration --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ *Check if you may want or need to adjust some other kernel configuration options:* @@ -1406,7 +1422,7 @@ kernel configuration options. .. _configmods_distros_bisref: Distro specific adjustments -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""""""""""""""""""""""""""" *Are you running* [:ref:`... `] @@ -1431,7 +1447,7 @@ when following this guide on a few commodity distributions. .. _configmods_individual_bisref: Individual adjustments -~~~~~~~~~~~~~~~~~~~~~~ +"""""""""""""""""""""" *If you want to influence the other aspects of the configuration, do so now.* [:ref:`... `] @@ -1448,13 +1464,13 @@ is missing. .. _saveconfig_bisref: Put the .config file aside --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ *Reprocess the .config after the latest changes and store it in a safe place.* [:ref:`... `] Put the .config you prepared aside, as you want to copy it back to the build -directory every time during this guide before you start building another +directory every time during this guide before you start building another kernel. That's because going back and forth between different versions can alter .config files in odd ways; those occasionally cause side effects that could confuse testing or in some cases render the result of your bisection @@ -1464,8 +1480,8 @@ meaningless. .. _introlatestcheck_bisref: -Try to reproduce the regression ------------------------------------------ +Try to reproduce the problem with the latest codebase +----------------------------------------------------- *Verify the regression is not caused by some .config change and check if it still occurs with the latest codebase.* [:ref:`... `] @@ -1519,21 +1535,21 @@ highly recommended for these reasons: .. _checkoutmaster_bisref: Check out the latest Linux codebase ------------------------------------ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Check out the latest Linux codebase.* - [:ref:`... `] + [:ref:`... `] In case you later want to recheck if an ever newer codebase might fix the problem, remember to run that ``git fetch --shallow-exclude [...]`` command again mentioned earlier to update your local Git repository. -[:ref:`back to step-by-step guide `] +[:ref:`back to step-by-step guide `] .. _build_bisref: Build your kernel ------------------ +~~~~~~~~~~~~~~~~~ *Build the image and the modules of your first kernel using the config file you prepared.* [:ref:`... `] @@ -1543,7 +1559,7 @@ yourself. Another subsection explains how to directly package your kernel up as deb, rpm or tar file. Dealing with build errors -~~~~~~~~~~~~~~~~~~~~~~~~~ +""""""""""""""""""""""""" When a build error occurs, it might be caused by some aspect of your machine's setup that often can be fixed quickly; other times though the problem lies in @@ -1578,7 +1594,7 @@ system, but lies in the code. If you run into one of those, you might thus find a solution (e.g. a patch) or workaround for your issue, too. Package your kernel up -~~~~~~~~~~~~~~~~~~~~~~ +"""""""""""""""""""""" The step-by-step guide uses the default make targets (e.g. 'bzImage' and 'modules' on x86) to build the image and the modules of your kernel, which later @@ -1609,7 +1625,7 @@ distribution's kernel packages. .. _install_bisref: Put the kernel in place ------------------------ +~~~~~~~~~~~~~~~~~~~~~~~ *Install the kernel you just built.* [:ref:`... `] @@ -1652,7 +1668,7 @@ process. Afterwards add your kernel to your bootloader configuration and reboot. .. _storagespace_bisref: Storage requirements per kernel -------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Check how much storage space the kernel, its modules, and other related files like the initramfs consume.* [:ref:`... `] @@ -1673,7 +1689,7 @@ need to look in different places. .. _tainted_bisref: Check if your newly built kernel considers itself 'tainted' ------------------------------------------------------------ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Check if the kernel marked itself as 'tainted'.* [:ref:`... `] @@ -1692,7 +1708,7 @@ interest, as your testing might be flawed otherwise. .. _recheckbroken_bisref: Check the kernel built from a recent mainline codebase ------------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Verify if your bug occurs with the newly built kernel.* [:ref:`... `] @@ -1718,7 +1734,7 @@ the kernel you built from the latest codebase. These are the most frequent: .. _recheckstablebroken_bisref: Check the kernel built from the latest stable/longterm codebase ---------------------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Are you facing a regression within a stable/longterm release, but failed to reproduce it with the kernel you just built using the latest mainline sources? @@ -1763,7 +1779,7 @@ ensure the kernel version you assumed to be 'good' earlier in the process (e.g. .. _recheckworking_bisref: Build your own version of the 'good' kernel -------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Build your own variant of the working kernel and check if the feature that regressed works as expected with it.* [:ref:`... `] @@ -1794,10 +1810,20 @@ most likely flawed. [:ref:`back to step-by-step guide `] +Perform a bisection and validate the result +------------------------------------------- + + *With all the preparations and precaution builds taken care of, you are now + ready to begin the bisection.* [:ref:`... `] + +The steps in this segment perform and validate the bisection. + +[:ref:`back to step-by-step guide `]. + .. _bisectstart_bisref: Start the bisection -------------------- +~~~~~~~~~~~~~~~~~~~ *Start the bisection and tell Git about the versions earlier established as 'good' and 'bad'.* [:ref:`... `] @@ -1811,7 +1837,7 @@ for you to test. .. _bisectbuild_bisref: Build a kernel from the bisection point ---------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Build, install, and boot a kernel from the code Git checked out using the same commands you used earlier.* [:ref:`... `] @@ -1839,7 +1865,7 @@ There are two things worth of note here: .. _bisecttest_bisref: Bisection checkpoint --------------------- +~~~~~~~~~~~~~~~~~~~~ *Check if the feature that regressed works in the kernel you just built.* [:ref:`... `] @@ -1853,7 +1879,7 @@ will be for nothing. .. _bisectlog_bisref: Put the bisection log away --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ *Store Git's bisection log and the current .config file in a safe place.* [:ref:`... `] @@ -1873,7 +1899,7 @@ ask for it after you report the regression. .. _revert_bisref: Try reverting the culprit -------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~ *Try reverting the culprit on top of the latest codebase to see if this fixes your regression.* [:ref:`... `] @@ -1891,14 +1917,20 @@ succeeds, test that kernel version instead. [:ref:`back to step-by-step guide `] +Cleanup steps during and after following this guide +--------------------------------------------------- -Supplementary tasks: cleanup during and after the bisection ------------------------------------------------------------ + *During and after following this guide you might want or need to remove some + of the kernels you installed.* [:ref:`... `] + +The steps in this section describe clean-up procedures. + +[:ref:`back to step-by-step guide `]. .. _makeroom_bisref: Cleaning up during the bisection --------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *To remove one of the kernels you installed, look up its 'kernelrelease' identifier.* [:ref:`... `] @@ -1939,7 +1971,7 @@ when all you want is to remove 6.0 or 6.0.1. [:ref:`back to step-by-step guide `] Cleaning up after the bisection -------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. _finishingtouch_bisref: From a421835a2a327f2b3472dcb755adb57d0f82e478 Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 9 Apr 2024 09:30:47 +0200 Subject: [PATCH 099/313] docs: verify/bisect: explain testing reverts, patches and newer code Rename 'Supplementary tasks' to 'Complementary tasks' while introducing a section 'Optional tasks: test reverts, patches, or later versions': the latter is something readers occasionally will have to do after reporting a bug and thus is best covered here. Signed-off-by: Thorsten Leemhuis Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/dacf26a4c48e9e8f04ecbc77e0a74c9b2a6a1103.1712647788.git.linux@leemhuis.info --- .../verify-bugs-and-bisect-regressions.rst | 128 ++++++++++++++++-- 1 file changed, 115 insertions(+), 13 deletions(-) diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst index 355c2cea5230..1987c827211f 100644 --- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst +++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst @@ -160,7 +160,7 @@ will be considered the 'good' release and used to prepare the .config file. this step (e.g. build, install, boot, and test a kernel to then tell Git the outcome). Do so again and again until Git shows which commit broke things. If you run short of disk space during this process, check the - section 'Supplementary tasks: cleanup during and after the process' + section 'Complementary tasks: cleanup during and after the process' below. d) Once your finished the bisection, put a few things away:: @@ -182,7 +182,7 @@ will be considered the 'good' release and used to prepare the .config file. kernel; just this time skip the first command copying the base .config file over, as that already has been taken care off. -* **Supplementary tasks**: cleanup during and after the process. +* **Complementary tasks**: cleanup during and after the process. a) To avoid running out of disk space during a bisection, you might need to remove some kernels you built earlier. You most likely want to keep those @@ -205,6 +205,18 @@ will be considered the 'good' release and used to prepare the .config file. the kernels you built earlier and later you might want to keep around for a week or two. +* **Optional task**: test a debug patch or a proposed fix later:: + + git fetch mainline + git switch --discard-changes --detach mainline/master + git apply /tmp/foobars-proposed-fix-v1.patch + cp ~/kernel-config-working .config + ./scripts/config --set-str CONFIG_LOCALVERSION '-local-foobars-fix-v1' + + Build, install, and boot a kernel as described in *segment 1, section b* -- + but this time omit the first command copying the build configuration over, + as that has been taken care of already. + .. _introguide_bissbs: Step-by-step guide on how to verify bugs and bisect regressions @@ -232,7 +244,9 @@ developers are obliged to act upon. :ref:`Segment 3: perform a bisection and validate the result `. - :ref:`Supplementary tasks: cleanup during and after following this guide `. + :ref:`Complementary tasks: cleanup during and after following this guide `. + + :ref:`Optional tasks: test reverts, patches, or later versions `. The steps in each segment illustrate the important aspects of the process, while a comprehensive reference section holds additional details for almost all of the @@ -669,7 +683,7 @@ be a waste of time. [:ref:`details `] Do you follow this guide to verify if a problem is present in the code currently supported by Linux kernel developers? Then you are done at this point. If you later want to remove the kernel you just built, check out -:ref:`Supplementary tasks: cleanup during and after following this guide `. +:ref:`Complementary tasks: cleanup during and after following this guide `. In case you face a regression, move on and execute at least the next segment as well. @@ -888,7 +902,7 @@ each kernel on commodity x86 machines. .. _introclosure_bissbs: -Supplementary tasks: cleanup during and after the bisection +Complementary tasks: cleanup during and after the bisection ----------------------------------------------------------- During and after following this guide you might want or need to remove some of @@ -950,6 +964,81 @@ space might run out. [:ref:`details `] +.. _introoptional_bissbs: + +Optional: test reverts, patches, or later versions +-------------------------------------------------- + +While or after reporting a bug, you might want or potentially will be asked to +test reverts, debug patches, proposed fixes, or other versions. In that case +follow these instructions. + +* Update your Git clone and check out the latest code. + + * In case you want to test mainline, fetch its latest changes before checking + its code out:: + + git fetch mainline + git switch --discard-changes --detach mainline/master + + * In case you want to test a stable or longterm kernel, first add the branch + holding the series you are interested in (6.2 in the example), unless you + already did so earlier:: + + git remote set-branches --add stable linux-6.2.y + + Then fetch the latest changes and check out the latest version from the + series:: + + git fetch stable + git switch --discard-changes --detach stable/linux-6.2.y + +* Copy your kernel build configuration over:: + + cp ~/kernel-config-working .config + +* Your next step depends on what you want to do: + + * In case you just want to test the latest codebase, head to the next step, + you are already all set. + + * In case you want to test if a revert fixes an issue, revert one or multiple + changes by specifying their commit ids:: + + git revert --no-edit cafec0cacaca0 + + Now give that kernel a special tag to facilitates its identification and + prevent accidentally overwriting another kernel:: + + ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted' + + * In case you want to test a patch, store the patch in a file like + '/tmp/foobars-proposed-fix-v1.patch' and apply it like this:: + + git apply /tmp/foobars-proposed-fix-v1.patch + + In case of multiple patches, repeat this step with the others. + + Now give that kernel a special tag to facilitates its identification and + prevent accidentally overwriting another kernel:: + + ./scripts/config --set-str CONFIG_LOCALVERSION '-local-foobars-fix-v1' + +* Build a kernel using the familiar commands, just without copying the kernel + build configuration over, as that has been taken care of already:: + + make olddefconfig && + make -j $(nproc --all) + # * Check if the free space suffices holding another kernel: + df -h /boot/ /lib/modules/ + sudo make modules_install + command -v installkernel && sudo make install + make -s kernelrelease | tee -a ~/kernels-built + reboot + +* Now verify you booted the newly built kernel and check it. + +[:ref:`details `] .. _submit_improvements: @@ -1986,20 +2075,33 @@ build artifacts and the Linux sources, but will leave the Git repository (~/linux/.git/) behind -- a simple ``git reset --hard`` thus will bring the sources back. -Removing the repository as well would likely be unwise at this point: there is a -decent chance developers will ask you to build another kernel to perform -additional tests. This is often required to debug an issue or check proposed -fixes. Before doing so you want to run the ``git fetch mainline`` command again -followed by ``git checkout mainline/master`` to bring your clone up to date and -checkout the latest codebase. Then apply the patch using ``git apply -`` or ``git am `` and build yet another kernel using the -familiar commands. +Removing the repository as well would likely be unwise at this point: there +is a decent chance developers will ask you to build another kernel to +perform additional tests -- like testing a debug patch or a proposed fix. +Details on how to perform those can be found in the section :ref:`Optional +tasks: test reverts, patches, or later versions `. Additional tests are also the reason why you want to keep the ~/kernel-config-working file around for a few weeks. [:ref:`back to step-by-step guide `] +.. _introoptional_bisref: + +Test reverts, patches, or later versions +---------------------------------------- + + *While or after reporting a bug, you might want or potentially will be asked + to test reverts, patches, proposed fixes, or other versions.* + [:ref:`... `] + +All the commands used in this section should be pretty straight forward, so +there is not much to add except one thing: when setting a kernel tag as +instructed, ensure it is not much longer than the one used in the example, as +problems will arise if the kernelrelease identifier exceeds 63 characters. + +[:ref:`back to step-by-step guide `]. + Additional reading material =========================== From 2bcfd71e8dfca5047f9fbcc2e2ba62c5bb39aa3a Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 9 Apr 2024 09:30:48 +0200 Subject: [PATCH 100/313] docs: verify/bisect: describe how to use a build host Describe how to build kernels on another system (with and without cross-compiling), as building locally can be quite painfully on some slow systems. This is done in an add-on section, as it would make the step-by-step guide to complicated if this special case would be described there. Signed-off-by: Thorsten Leemhuis Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/288160cb4769e46a3280250ca71da0abc4aa002d.1712647788.git.linux@leemhuis.info --- .../verify-bugs-and-bisect-regressions.rst | 78 ++++++++++++++++++- 1 file changed, 74 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst index 1987c827211f..6193c7976427 100644 --- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst +++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst @@ -276,6 +276,10 @@ Preparations: set up everything to build your own kernels The following steps lay the groundwork for all further tasks. +Note: the instructions assume you are building and testing on the same +machine; if you want to compile the kernel on another system, check +:ref:`Build kernels on a different machine ` below. + .. _backup_bissbs: * Create a fresh backup and put system repair and restore tools at hand, just @@ -2103,11 +2107,77 @@ problems will arise if the kernelrelease identifier exceeds 63 characters. [:ref:`back to step-by-step guide `]. -Additional reading material -=========================== +Additional information +====================== -Further sources ---------------- +.. _buildhost_bis: + +Build kernels on a different machine +------------------------------------ + +To compile kernels on another system, slightly alter the step-by-step guide's +instructions: + +* Start following the guide on the machine where you want to install and test + the kernels later. + +* After executing ':ref:`Boot into the working kernel and briefly use the + apparently broken feature `', save the list of loaded + modules to a file using ``lsmod > ~/test-machine-lsmod``. Then locate the + build configuration for the running kernel (see ':ref:`Start defining the + build configuration for your kernel `' for hints on where + to find it) and store it as '~/test-machine-config-working'. Transfer both + files to the home directory of your build host. + +* Continue the guide on the build host (e.g. with ':ref:`Ensure to have enough + free space for building [...] `'). + +* When you reach ':ref:`Start preparing a kernel build configuration[...] + `': before running ``make olddefconfig`` for the first time, + execute the following command to base your configuration on the one from the + test machine's 'working' kernel:: + + cp ~/test-machine-config-working ~/linux/.config + +* During the next step to ':ref:`disable any apparently superfluous kernel + modules `' use the following command instead:: + + yes '' | make localmodconfig LSMOD=~/lsmod_foo-machine localmodconfig + +* Continue the guide, but ignore the instructions outlining how to compile, + install, and reboot into a kernel every time they come up. Instead build + like this:: + + cp ~/kernel-config-working .config + make olddefconfig && + make -j $(nproc --all) targz-pkg + + This will generate a gzipped tar file whose name is printed in the last + line shown; for example, a kernel with the kernelrelease identifier + '6.0.0-rc1-local-g928a87efa423' built for x86 machines usually will + be stored as '~/linux/linux-6.0.0-rc1-local-g928a87efa423-x86.tar.gz'. + + Copy that file to your test machine's home directory. + +* Switch to the test machine to check if you have enough space to hold another + kernel. Then extract the file you transferred:: + + sudo tar -xvzf ~/linux-6.0.0-rc1-local-g928a87efa423-x86.tar.gz -C / + + Afterwards :ref:`generate the initramfs and add the kernel to your boot + loader's configuration `; on some distributions the following + command will take care of both these tasks:: + + sudo /sbin/installkernel 6.0.0-rc1-local-g928a87efa423 /boot/vmlinuz-6.0.0-rc1-local-g928a87efa423 + + Now reboot and ensure you started the intended kernel. + +This approach even works when building for another architecture: just install +cross-compilers and add the appropriate parameters to every invocation of make +(e.g. ``make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- [...]``). + +Additional reading material +--------------------------- * The `man page for 'git bisect' `_ and `fighting regressions with 'git bisect' `_ From 8d939ae349343b55984ea821164e2be526d48cd1 Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 9 Apr 2024 09:30:49 +0200 Subject: [PATCH 101/313] docs: verify/bisect: stable regressions: first stable, then mainline Rearrange the instructions so that readers facing a regression within a stable or longterm series first test its latest release before testing mainline. This is less scary for some people. It also reduces the chance that something goes sideways for readers that compile their first kernel, as mainline can cause slightly more trouble. Signed-off-by: Thorsten Leemhuis Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/efd3cb9c68db450091021326bf9c334553df0ec2.1712647788.git.linux@leemhuis.info --- .../verify-bugs-and-bisect-regressions.rst | 91 +++++++++++-------- 1 file changed, 51 insertions(+), 40 deletions(-) diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst index 6193c7976427..c389d4fd7599 100644 --- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst +++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst @@ -309,31 +309,32 @@ machine; if you want to compile the kernel on another system, check .. _rangecheck_bissbs: * Determine the kernel versions considered 'good' and 'bad' throughout this - guide. + guide: - Do you follow this guide to verify if a bug is present in the code developers - care for? Then consider the mainline release your 'working' kernel (the newest - one you regularly use) is based on to be the 'good' version; if your 'working' - kernel for example is 6.0.13, then your 'good' kernel is 6.0. + * Do you follow this guide to verify if a bug is present in the code the + primary developers care for? Then consider the version of the newest kernel + you regularly use currently as 'good' (e.g. 6.0, 6.0.13, or 6.1-rc2). - In case you face a regression, it depends on the version range where the - regression was introduced: + * Do you face a regression, e.g. something broke or works worse after + switching to a newer kernel version? In that case it depends on the version + range during which the problem appeared: - * Something which used to work in Linux 6.0 broke when switching to Linux - 6.1-rc1? Then henceforth regard 6.0 as the last known 'good' version - and 6.1-rc1 as the first 'bad' one. + * Something regressed when updating from a stable/longterm release + (say 6.0.13) to a newer mainline series (like 6.1-rc7 or 6.1) or a + stable/longterm version based on one (say 6.1.5)? Then consider the + mainline release your working kernel is based on to be the 'good' + version (e.g. 6.0) and the first version to be broken as the 'bad' one + (e.g. 6.1-rc7, 6.1, or 6.1.5). Note, at this point it is merely assumed + that 6.0 is fine; this hypothesis will be checked in segment 2. - * Some function stopped working when updating from 6.0.13 to 6.1.5? Then for - the time being consider 6.0 as the last 'good' version and 6.1.5 as - the 'bad' one. Note, at this point it is merely assumed that 6.0 is fine; - this assumption will be checked in segment 2. + * Something regressed when switching from one mainline version (say 6.0) to + a later one (like 6.1-rc1) or a stable/longterm release based on it + (say 6.1.5)? Then regard the last working version (e.g. 6.0) as 'good' and + the first broken (e.g. 6.1-rc1 or 6.1.5) as 'bad'. - * A feature you used in 6.0.13 does not work at all or worse in 6.1.15? In - that case you want to bisect within a stable/longterm series: consider - 6.0.13 as the last known 'good' version and 6.0.15 as the first 'bad' - one. Note, in this case you still want to compile and test a mainline kernel - as explained in segment 1: the outcome will determine if you need to report - your issue to the regular developers or the stable team. + * Something regressed when updating within a stable/longterm series (say + from 6.0.13 to 6.0.15)? Then consider those versions as 'good' and 'bad' + (e.g. 6.0.13 and 6.0.15), as you need to bisect within that series. *Note, do not confuse 'good' version with 'working' kernel; the latter term throughout this guide will refer to the last kernel that has been working @@ -392,19 +393,13 @@ machine; if you want to compile the kernel on another system, check .. _stablesources_bissbs: -* Retrieve the sources for any stable or longterm series you might need. - - Is the version you earlier established as 'bad' a stable or longterm release? - Then download the code for the series it belongs to ('linux-6.1.y' in this - example):: +* Is one of the versions you earlier established as 'good' or 'bad' a stable or + longterm release (say 6.1.5)? Then download the code for the series it belongs + to ('linux-6.1.y' in this example):: git remote set-branches --add stable linux-6.1.y git fetch stable - If the version earlier established as 'good' is from a different stable or - longterm series (say 6.0.13), repeat the previous step, but this time for the - branch holding the series the 'good' version belongs to (e.g. linux-6.0.y). - .. _oldconfig_bissbs: * Start preparing a kernel build configuration (the '.config' file). @@ -545,10 +540,24 @@ be a waste of time. [:ref:`details `] .. _checkoutmaster_bissbs: -* Check out the latest Linux codebase:: +* Check out the latest Linux codebase. - cd ~/linux/ - git switch --discard-changes --detach mainline/master + * Are your 'good' and 'bad' versions from the same stable or longterm series? + Then check the `front page of kernel.org `_: if it + lists a release from that series without an '[EOL]' tag, checkout the series + latest version ('linux-6.1.y' in the following example):: + + cd ~/linux/ + git switch --discard-changes --detach stable/linux-6.1.y + + Your series is unsupported, if is not listed or carrying a 'end of life' + tag. In that case you might want to check if a successor series (say + linux-6.2.y) or mainline (see next point) fix the bug. + + * In all other cases, run:: + + cd ~/linux/ + git switch --discard-changes --detach mainline/master [:ref:`details `] @@ -650,15 +659,15 @@ be a waste of time. [:ref:`details `] .. _recheckstablebroken_bissbs: -* Are you facing a problem within a stable/longterm series, but failed to - reproduce it with the mainline kernel you just built? One that according to - the `front page of kernel.org `_ is still supported? Then - check if the latest codebase for the particular series might already fix the - problem. To do so, check out that series latest version (again, this here is - assumed to be 6.0):: +* Did you just built a stable or longterm kernel? And were you able to reproduce + the regression with it? Then you should test the latest mainline codebase as + well, because the result determines which developers the bug must be submitted + to. + + To prepare that test, check out current mainline:: cd ~/linux/ - git switch --discard-changes --detach linux-6.0.y + git switch --discard-changes --detach mainline/master Now use the checked out code to build and install another kernel using the commands the earlier steps already described in more detail:: @@ -680,7 +689,9 @@ be a waste of time. [:ref:`details `] uname -r cat /proc/sys/kernel/tainted - Now verify if this kernel is showing the problem. + Now verify if this kernel is showing the problem. If it does, then you need + to report the bug to the primary developers; if it does not, report it to the + stable team. See Documentation/admin-guide/reporting-issues.rst for details. [:ref:`details `] From 74871791ffa9562d43567c5ff2ae93def3f39f65 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 25 Mar 2024 09:34:36 +0100 Subject: [PATCH 102/313] ntfs3: serve as alias for the legacy ntfs driver Johan Hovold reported that removing the legacy ntfs driver broke boot for him since his fstab uses the legacy ntfs driver to access firmware from the original Windows partition. Use ntfs3 as an alias for legacy ntfs if CONFIG_NTFS_FS is selected. This is similar to how ext3 is treated. Link: https://lore.kernel.org/r/Zf2zPf5TO5oYt3I3@hovoldconsulting.com Link: https://lore.kernel.org/r/20240325-hinkriegen-zuziehen-d7e2c490427a@brauner Fixes: 7ffa8f3d3023 ("fs: Remove NTFS classic") Tested-by: Johan Hovold Cc: Matthew Wilcox (Oracle) Cc: Johan Hovold Signed-off-by: Christian Brauner --- fs/ntfs3/Kconfig | 9 +++++++++ fs/ntfs3/super.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/fs/ntfs3/Kconfig b/fs/ntfs3/Kconfig index cdfdf51e55d7..7bc31d69f680 100644 --- a/fs/ntfs3/Kconfig +++ b/fs/ntfs3/Kconfig @@ -46,3 +46,12 @@ config NTFS3_FS_POSIX_ACL NOTE: this is linux only feature. Windows will ignore these ACLs. If you don't know what Access Control Lists are, say N. + +config NTFS_FS + tristate "NTFS file system support" + select NTFS3_FS + select BUFFER_HEAD + select NLS + help + This config option is here only for backward compatibility. NTFS + filesystem is now handled by the NTFS3 driver. diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 9df7c20d066f..8d2e51bae2cb 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -1798,6 +1798,35 @@ static struct file_system_type ntfs_fs_type = { .kill_sb = ntfs3_kill_sb, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; + +#if IS_ENABLED(CONFIG_NTFS_FS) +static struct file_system_type ntfs_legacy_fs_type = { + .owner = THIS_MODULE, + .name = "ntfs", + .init_fs_context = ntfs_init_fs_context, + .parameters = ntfs_fs_parameters, + .kill_sb = ntfs3_kill_sb, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, +}; +MODULE_ALIAS_FS("ntfs"); + +static inline void register_as_ntfs_legacy(void) +{ + int err = register_filesystem(&ntfs_legacy_fs_type); + if (err) + pr_warn("ntfs3: Failed to register legacy ntfs filesystem driver: %d\n", err); +} + +static inline void unregister_as_ntfs_legacy(void) +{ + unregister_filesystem(&ntfs_legacy_fs_type); +} +#else +static inline void register_as_ntfs_legacy(void) {} +static inline void unregister_as_ntfs_legacy(void) {} +#endif + + // clang-format on static int __init init_ntfs_fs(void) @@ -1832,6 +1861,7 @@ static int __init init_ntfs_fs(void) goto out1; } + register_as_ntfs_legacy(); err = register_filesystem(&ntfs_fs_type); if (err) goto out; @@ -1849,6 +1879,7 @@ static void __exit exit_ntfs_fs(void) rcu_barrier(); kmem_cache_destroy(ntfs_inode_cachep); unregister_filesystem(&ntfs_fs_type); + unregister_as_ntfs_legacy(); ntfs3_exit_bitmap(); #ifdef CONFIG_PROC_FS From feafe59c897500e11becd238a30be1c33eb188a2 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Thu, 11 Apr 2024 19:55:15 +0300 Subject: [PATCH 103/313] wifi: ath11k: use RCU when accessing struct inet6_dev::ac_list Commit c3718936ec47 ("ipv6: anycast: complete RCU handling of struct ifacaddr6") converted struct inet6_dev::ac_list to use RCU but missed that ath11k also accesses this list. Now sparse warns: drivers/net/wireless/ath/ath11k/mac.c:9145:21: warning: incorrect type in assignment (different address spaces) drivers/net/wireless/ath/ath11k/mac.c:9145:21: expected struct ifacaddr6 *ifaca6 drivers/net/wireless/ath/ath11k/mac.c:9145:21: got struct ifacaddr6 [noderef] __rcu *ac_list drivers/net/wireless/ath/ath11k/mac.c:9145:53: warning: incorrect type in assignment (different address spaces) drivers/net/wireless/ath/ath11k/mac.c:9145:53: expected struct ifacaddr6 *ifaca6 drivers/net/wireless/ath/ath11k/mac.c:9145:53: got struct ifacaddr6 [noderef] __rcu *aca_next Fix it by using rtnl_dereference(). Also add a note that read_lock_bh() calls rcu_read_lock() which I was not aware of. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: c3718936ec47 ("ipv6: anycast: complete RCU handling of struct ifacaddr6") Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://msgid.link/20240411165516.4070649-2-kvalo@kernel.org --- drivers/net/wireless/ath/ath11k/mac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index a6a37d67a50a..9f4bf41a3d41 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9020,6 +9020,7 @@ static void ath11k_mac_op_ipv6_changed(struct ieee80211_hw *hw, offload = &arvif->arp_ns_offload; count = 0; + /* Note: read_lock_bh() calls rcu_read_lock() */ read_lock_bh(&idev->lock); memset(offload->ipv6_addr, 0, sizeof(offload->ipv6_addr)); @@ -9050,7 +9051,8 @@ static void ath11k_mac_op_ipv6_changed(struct ieee80211_hw *hw, } /* get anycast address */ - for (ifaca6 = idev->ac_list; ifaca6; ifaca6 = ifaca6->aca_next) { + for (ifaca6 = rcu_dereference(idev->ac_list); ifaca6; + ifaca6 = rcu_dereference(ifaca6->aca_next)) { if (count >= ATH11K_IPV6_MAX_COUNT) goto generate; From 48ef0ba12e6b77a1ce5d09c580c38855b090ae7c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 16 Apr 2024 08:56:33 +0800 Subject: [PATCH 104/313] dm: restore synchronous close of device mapper block device 'dmsetup remove' and 'dmsetup remove_all' require synchronous bdev release. Otherwise dm_lock_for_deletion() may return -EBUSY if the open count is > 0, because the open count is dropped in dm_blk_close() which occurs after fput() completes. So if dm_blk_close() is delayed because of asynchronous fput(), this device mapper device is skipped during remove, which is a regression. Fix the issue by using __fput_sync(). Also, DM device removal has long supported being made asynchronous by setting the DMF_DEFERRED_REMOVE flag on the DM device. So leverage using async fput() in close_table_device() if DMF_DEFERRED_REMOVE flag is set. Reported-by: Zhong Changhui Fixes: a28d893eb327 ("md: port block device access to file") Suggested-by: Christian Brauner Signed-off-by: Ming Lei [snitzer: editted commit header, use fput() if DMF_DEFERRED_REMOVE set] Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 56aa2a8b9d71..7d0746b37c8e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -765,7 +765,7 @@ static struct table_device *open_table_device(struct mapped_device *md, return td; out_blkdev_put: - fput(bdev_file); + __fput_sync(bdev_file); out_free_td: kfree(td); return ERR_PTR(r); @@ -778,7 +778,13 @@ static void close_table_device(struct table_device *td, struct mapped_device *md { if (md->disk->slave_dir) bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); - fput(td->dm_dev.bdev_file); + + /* Leverage async fput() if DMF_DEFERRED_REMOVE set */ + if (unlikely(test_bit(DMF_DEFERRED_REMOVE, &md->flags))) + fput(td->dm_dev.bdev_file); + else + __fput_sync(td->dm_dev.bdev_file); + put_dax(td->dm_dev.dax_dev); list_del(&td->list); kfree(td); From 61231eb8113ce47991f35024f9c20810b37996bf Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 29 Mar 2024 10:36:50 +0000 Subject: [PATCH 105/313] ARC: [plat-hsdk]: Remove misplaced interrupt-cells property "gmac" node stands for just an ordinary Ethernet controller, which is by no means a provider of interrupts, i.e. it doesn't serve as an interrupt controller, thus "#interrupt-cells" property doesn't belong to it and so we remove it. Fixes: ------------>8------------ DTC arch/arc/boot/dts/hsdk.dtb arch/arc/boot/dts/hsdk.dts:207.23-235.5: Warning (interrupt_provider): /soc/ethernet@8000: '#interrupt-cells' found, but node is not an interrupt provider arch/arc/boot/dts/hsdk.dtb: Warning (interrupt_map): Failed prerequisite 'interrupt_provider' ------------>8------------ Reported-by: Vineet Gupta Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 6691f4255077..41b980df862b 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -205,7 +205,6 @@ dmac_cfg_clk: dmac-gpu-cfg-clk { }; gmac: ethernet@8000 { - #interrupt-cells = <1>; compatible = "snps,dwmac"; reg = <0x8000 0x2000>; interrupts = <10>; From 7044dcff8301b29269016ebd17df27c4736140d2 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Mon, 1 Apr 2024 18:52:50 +0000 Subject: [PATCH 106/313] rust: macros: fix soundness issue in `module!` macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `module!` macro creates glue code that are called by C to initialize the Rust modules using the `Module::init` function. Part of this glue code are the local functions `__init` and `__exit` that are used to initialize/destroy the Rust module. These functions are safe and also visible to the Rust mod in which the `module!` macro is invoked. This means that they can be called by other safe Rust code. But since they contain `unsafe` blocks that rely on only being called at the right time, this is a soundness issue. Wrap these generated functions inside of two private modules, this guarantees that the public functions cannot be called from the outside. Make the safe functions `unsafe` and add SAFETY comments. Cc: stable@vger.kernel.org Reported-by: Björn Roy Baron Closes: https://github.com/Rust-for-Linux/linux/issues/629 Fixes: 1fbde52bde73 ("rust: add `macros` crate") Signed-off-by: Benno Lossin Reviewed-by: Wedson Almeida Filho Link: https://lore.kernel.org/r/20240401185222.12015-1-benno.lossin@proton.me [ Moved `THIS_MODULE` out of the private-in-private modules since it should remain public, as Dirk Behme noticed [1]. Capitalized comments, avoided newline in non-list SAFETY comments and reworded to add Reported-by and newline. ] Link: https://rust-for-linux.zulipchat.com/#narrow/stream/291565-Help/topic/x/near/433512583 [1] Signed-off-by: Miguel Ojeda --- rust/macros/module.rs | 190 +++++++++++++++++++++++++----------------- 1 file changed, 115 insertions(+), 75 deletions(-) diff --git a/rust/macros/module.rs b/rust/macros/module.rs index 27979e582e4b..acd0393b5095 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -199,17 +199,6 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { /// Used by the printing macros, e.g. [`info!`]. const __LOG_PREFIX: &[u8] = b\"{name}\\0\"; - /// The \"Rust loadable module\" mark. - // - // This may be best done another way later on, e.g. as a new modinfo - // key or a new section. For the moment, keep it simple. - #[cfg(MODULE)] - #[doc(hidden)] - #[used] - static __IS_RUST_MODULE: () = (); - - static mut __MOD: Option<{type_}> = None; - // SAFETY: `__this_module` is constructed by the kernel at load time and will not be // freed until the module is unloaded. #[cfg(MODULE)] @@ -221,81 +210,132 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { kernel::ThisModule::from_ptr(core::ptr::null_mut()) }}; - // Loadable modules need to export the `{{init,cleanup}}_module` identifiers. - /// # Safety - /// - /// This function must not be called after module initialization, because it may be - /// freed after that completes. - #[cfg(MODULE)] - #[doc(hidden)] - #[no_mangle] - #[link_section = \".init.text\"] - pub unsafe extern \"C\" fn init_module() -> core::ffi::c_int {{ - __init() - }} + // Double nested modules, since then nobody can access the public items inside. + mod __module_init {{ + mod __module_init {{ + use super::super::{type_}; - #[cfg(MODULE)] - #[doc(hidden)] - #[no_mangle] - pub extern \"C\" fn cleanup_module() {{ - __exit() - }} + /// The \"Rust loadable module\" mark. + // + // This may be best done another way later on, e.g. as a new modinfo + // key or a new section. For the moment, keep it simple. + #[cfg(MODULE)] + #[doc(hidden)] + #[used] + static __IS_RUST_MODULE: () = (); - // Built-in modules are initialized through an initcall pointer - // and the identifiers need to be unique. - #[cfg(not(MODULE))] - #[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))] - #[doc(hidden)] - #[link_section = \"{initcall_section}\"] - #[used] - pub static __{name}_initcall: extern \"C\" fn() -> core::ffi::c_int = __{name}_init; + static mut __MOD: Option<{type_}> = None; - #[cfg(not(MODULE))] - #[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)] - core::arch::global_asm!( - r#\".section \"{initcall_section}\", \"a\" - __{name}_initcall: - .long __{name}_init - . - .previous - \"# - ); + // Loadable modules need to export the `{{init,cleanup}}_module` identifiers. + /// # Safety + /// + /// This function must not be called after module initialization, because it may be + /// freed after that completes. + #[cfg(MODULE)] + #[doc(hidden)] + #[no_mangle] + #[link_section = \".init.text\"] + pub unsafe extern \"C\" fn init_module() -> core::ffi::c_int {{ + // SAFETY: This function is inaccessible to the outside due to the double + // module wrapping it. It is called exactly once by the C side via its + // unique name. + unsafe {{ __init() }} + }} - #[cfg(not(MODULE))] - #[doc(hidden)] - #[no_mangle] - pub extern \"C\" fn __{name}_init() -> core::ffi::c_int {{ - __init() - }} + #[cfg(MODULE)] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn cleanup_module() {{ + // SAFETY: + // - This function is inaccessible to the outside due to the double + // module wrapping it. It is called exactly once by the C side via its + // unique name, + // - furthermore it is only called after `init_module` has returned `0` + // (which delegates to `__init`). + unsafe {{ __exit() }} + }} - #[cfg(not(MODULE))] - #[doc(hidden)] - #[no_mangle] - pub extern \"C\" fn __{name}_exit() {{ - __exit() - }} + // Built-in modules are initialized through an initcall pointer + // and the identifiers need to be unique. + #[cfg(not(MODULE))] + #[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))] + #[doc(hidden)] + #[link_section = \"{initcall_section}\"] + #[used] + pub static __{name}_initcall: extern \"C\" fn() -> core::ffi::c_int = __{name}_init; - fn __init() -> core::ffi::c_int {{ - match <{type_} as kernel::Module>::init(&THIS_MODULE) {{ - Ok(m) => {{ - unsafe {{ - __MOD = Some(m); + #[cfg(not(MODULE))] + #[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)] + core::arch::global_asm!( + r#\".section \"{initcall_section}\", \"a\" + __{name}_initcall: + .long __{name}_init - . + .previous + \"# + ); + + #[cfg(not(MODULE))] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn __{name}_init() -> core::ffi::c_int {{ + // SAFETY: This function is inaccessible to the outside due to the double + // module wrapping it. It is called exactly once by the C side via its + // placement above in the initcall section. + unsafe {{ __init() }} + }} + + #[cfg(not(MODULE))] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn __{name}_exit() {{ + // SAFETY: + // - This function is inaccessible to the outside due to the double + // module wrapping it. It is called exactly once by the C side via its + // unique name, + // - furthermore it is only called after `__{name}_init` has returned `0` + // (which delegates to `__init`). + unsafe {{ __exit() }} + }} + + /// # Safety + /// + /// This function must only be called once. + unsafe fn __init() -> core::ffi::c_int {{ + match <{type_} as kernel::Module>::init(&super::super::THIS_MODULE) {{ + Ok(m) => {{ + // SAFETY: No data race, since `__MOD` can only be accessed by this + // module and there only `__init` and `__exit` access it. These + // functions are only called once and `__exit` cannot be called + // before or during `__init`. + unsafe {{ + __MOD = Some(m); + }} + return 0; + }} + Err(e) => {{ + return e.to_errno(); + }} }} - return 0; }} - Err(e) => {{ - return e.to_errno(); + + /// # Safety + /// + /// This function must + /// - only be called once, + /// - be called after `__init` has been called and returned `0`. + unsafe fn __exit() {{ + // SAFETY: No data race, since `__MOD` can only be accessed by this module + // and there only `__init` and `__exit` access it. These functions are only + // called once and `__init` was already called. + unsafe {{ + // Invokes `drop()` on `__MOD`, which should be used for cleanup. + __MOD = None; + }} }} + + {modinfo} }} }} - - fn __exit() {{ - unsafe {{ - // Invokes `drop()` on `__MOD`, which should be used for cleanup. - __MOD = None; - }} - }} - - {modinfo} ", type_ = info.type_, name = info.name, From 8933cf4651e02853ca679be7b2d978dfcdcc5e0c Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 4 Apr 2024 15:17:02 +0100 Subject: [PATCH 107/313] rust: make mutually exclusive with CFI_CLANG On RISC-V and arm64, and presumably x86, if CFI_CLANG is enabled, loading a rust module will trigger a kernel panic. Support for sanitisers, including kcfi (CFI_CLANG), is in the works, but for now they're nightly-only options in rustc. Make RUST depend on !CFI_CLANG to prevent configuring a kernel without symmetrical support for kfi. [ Matthew Maurer writes [1]: This patch is fine by me - the last patch needed for KCFI to be functional in Rust just landed upstream last night, so we should revisit this (in the form of enabling it) once we move to `rustc-1.79.0` or later. Ramon de C Valle also gave feedback [2] on the status of KCFI for Rust and created a tracking issue [3] in upstream Rust. - Miguel ] Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Cc: stable@vger.kernel.org Signed-off-by: Conor Dooley Acked-by: Nathan Chancellor Link: https://lore.kernel.org/rust-for-linux/CAGSQo024u1gHJgzsO38Xg3c4or+JupoPABQx_+0BLEpPg0cOEA@mail.gmail.com/ [1] Link: https://lore.kernel.org/rust-for-linux/CAOcBZOS2kPyH0Dm7Fuh4GC3=v7nZhyzBj_-dKu3PfAnrHZvaxg@mail.gmail.com/ [2] Link: https://github.com/rust-lang/rust/issues/123479 [3] Link: https://lore.kernel.org/r/20240404-providing-emporium-e652e359c711@spud [ Added feedback from the list, links, and used Cc for the tag. ] Signed-off-by: Miguel Ojeda --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/init/Kconfig b/init/Kconfig index b9a336a3d7d8..664bedb9a71f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1899,6 +1899,7 @@ config RUST bool "Rust support" depends on HAVE_RUST depends on RUST_IS_AVAILABLE + depends on !CFI_CLANG depends on !MODVERSIONS depends on !GCC_PLUGINS depends on !RANDSTRUCT From b3de7b433a323bb80303d77e69f1281bfab0a70b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 15 Apr 2024 08:11:03 -0500 Subject: [PATCH 108/313] dt-bindings: eeprom: at24: Fix ST M24C64-D compatible schema The schema for the ST M24C64-D compatible string doesn't work. Validation fails as the 'd-wl' suffix is not added to the preceeding schema which defines the entries and vendors. The actual users are incorrect as well because the vendor is listed as Atmel whereas the part is made by ST. As this part doesn't appear to have multiple vendors, move it to its own entry. Fixes: 0997ff1fc143 ("dt-bindings: at24: add ST M24C64-D Additional Write lockable page") Fixes: c761068f484c ("dt-bindings: at24: add ST M24C32-D Additional Write lockable page") Signed-off-by: Rob Herring Reviewed-by: Marek Vasut Acked-by: Conor Dooley Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/eeprom/at24.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index 1812ef31d5f1..3c36cd0510de 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -68,14 +68,10 @@ properties: pattern: cs16$ - items: pattern: c32$ - - items: - pattern: c32d-wl$ - items: pattern: cs32$ - items: pattern: c64$ - - items: - pattern: c64d-wl$ - items: pattern: cs64$ - items: @@ -136,6 +132,7 @@ properties: - renesas,r1ex24128 - samsung,s524ad0xd1 - const: atmel,24c128 + - pattern: '^atmel,24c(32|64)d-wl$' # Actual vendor is st label: description: Descriptive name of the EEPROM. From 02bed83d59e37da30b745e30129511b1cc595c92 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 16 Apr 2024 17:55:02 -0400 Subject: [PATCH 109/313] bcachefs: Fix null ptr deref in twf from BCH_IOCTL_FSCK_OFFLINE We need to initialize the stdio redirects before they're used. Signed-off-by: Kent Overstreet --- fs/bcachefs/chardev.c | 4 +++- fs/bcachefs/thread_with_file.c | 15 +++++++++++++-- fs/bcachefs/thread_with_file.h | 3 +++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 72781aad6ba7..4d14f19f5185 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -232,13 +232,15 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a /* We need request_key() to be called before we punt to kthread: */ opt_set(thr->opts, nostart, true); + bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); + thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); if (!IS_ERR(thr->c) && thr->c->opts.errors == BCH_ON_ERROR_panic) thr->c->opts.errors = BCH_ON_ERROR_ro; - ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops); + ret = __bch2_run_thread_with_stdio(&thr->thr); out: darray_for_each(devs, i) kfree(*i); diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index 940db15d6a93..b1af7ac430f6 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -294,16 +294,27 @@ static int thread_with_stdio_fn(void *arg) return 0; } -int bch2_run_thread_with_stdio(struct thread_with_stdio *thr, - const struct thread_with_stdio_ops *ops) +void bch2_thread_with_stdio_init(struct thread_with_stdio *thr, + const struct thread_with_stdio_ops *ops) { stdio_buf_init(&thr->stdio.input); stdio_buf_init(&thr->stdio.output); thr->ops = ops; +} +int __bch2_run_thread_with_stdio(struct thread_with_stdio *thr) +{ return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, thread_with_stdio_fn); } +int bch2_run_thread_with_stdio(struct thread_with_stdio *thr, + const struct thread_with_stdio_ops *ops) +{ + bch2_thread_with_stdio_init(thr, ops); + + return __bch2_run_thread_with_stdio(thr); +} + int bch2_run_thread_with_stdout(struct thread_with_stdio *thr, const struct thread_with_stdio_ops *ops) { diff --git a/fs/bcachefs/thread_with_file.h b/fs/bcachefs/thread_with_file.h index af54ea8f5b0f..1d63d14d7dca 100644 --- a/fs/bcachefs/thread_with_file.h +++ b/fs/bcachefs/thread_with_file.h @@ -63,6 +63,9 @@ struct thread_with_stdio { const struct thread_with_stdio_ops *ops; }; +void bch2_thread_with_stdio_init(struct thread_with_stdio *, + const struct thread_with_stdio_ops *); +int __bch2_run_thread_with_stdio(struct thread_with_stdio *); int bch2_run_thread_with_stdio(struct thread_with_stdio *, const struct thread_with_stdio_ops *); int bch2_run_thread_with_stdout(struct thread_with_stdio *, From 9fd5a48a1e3ff79ed54922668279ccb4d7190a62 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 16 Apr 2024 08:16:02 -0700 Subject: [PATCH 110/313] bcachefs: Fix format specifier in validate_bset_keys() When building for 32-bit platforms, for which size_t is 'unsigned int', there is a warning from a format string in validate_bset_keys(): fs/bcachefs/btree_io.c: In function 'validate_bset_keys': fs/bcachefs/btree_io.c:891:34: error: format '%lu' expects argument of type 'long unsigned int', but argument 12 has type 'unsigned int' [-Werror=format=] 891 | "bad k->u64s %u (min %u max %lu)", k->u64s, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fs/bcachefs/btree_io.c:603:32: note: in definition of macro 'btree_err' 603 | msg, ##__VA_ARGS__); \ | ^~~ fs/bcachefs/btree_io.c:887:21: note: in expansion of macro 'btree_err_on' 887 | if (btree_err_on(!bkeyp_u64s_valid(&b->format, k), | ^~~~~~~~~~~~ fs/bcachefs/btree_io.c:891:64: note: format string is defined here 891 | "bad k->u64s %u (min %u max %lu)", k->u64s, | ~~^ | | | long unsigned int | %u cc1: all warnings being treated as errors BKEY_U64s is size_t so the entire expression is promoted to size_t. Use the '%zu' specifier so that there is no warning regardless of the width of size_t. Fixes: 031ad9e7dbd1 ("bcachefs: Check for packed bkeys that are too big") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404130747.wH6Dd23p-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202404131536.HdAMBOVc-lkp@intel.com/ Signed-off-by: Nathan Chancellor Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 9678b2375bed..debb0edc3455 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -888,7 +888,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, btree_node_bkey_bad_u64s, - "bad k->u64s %u (min %u max %lu)", k->u64s, + "bad k->u64s %u (min %u max %zu)", k->u64s, bkeyp_key_u64s(&b->format, k), U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k))) goto drop_this_key; From fabb4d49854281027454b0fa305d33f6c9ec4b47 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 15 Apr 2024 22:54:10 -0400 Subject: [PATCH 111/313] bcachefs: node scan: ignore multiple nodes with same seq if interior Interior nodes are not really needed, when we have to scan - but if this pops up for leaf nodes we'll need a real heuristic. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_node_scan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 866bd278439f..c60794264da2 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -302,6 +302,8 @@ static int handle_overwrites(struct bch_fs *c, start->max_key = bpos_predecessor(n->min_key); start->range_updated = true; + } else if (n->level) { + n->overwritten = true; } else { struct printbuf buf = PRINTBUF; From 79055f50a65fe5eb58e9da1f79fb0a4f4bc82fff Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 15 Apr 2024 23:53:12 -0400 Subject: [PATCH 112/313] bcachefs: make sure to release last journal pin in replay This fixes a deadlock when journal replay has many keys to insert that were from fsck, not the journal. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 0f328aba9760..be5b47619327 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -249,7 +249,10 @@ int bch2_journal_replay(struct bch_fs *c) struct journal_key *k = *kp; - replay_now_at(j, k->journal_seq); + if (k->journal_seq) + replay_now_at(j, k->journal_seq); + else + replay_now_at(j, j->replay_journal_seq_end); ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc| From 8c75cdcdf869acabfdc7858827099dcde9f24e6c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 15 Apr 2024 14:18:11 +0200 Subject: [PATCH 113/313] wifi: mac80211: split mesh fast tx cache into local/proxied/forwarded Depending on the origin of the packets (and their SA), 802.11 + mesh headers could be filled in differently. In order to properly deal with that, add a new field to the lookup key, indicating the type (local, proxied or forwarded). This can fix spurious packet drop issues that depend on the order in which nodes/hosts communicate with each other. Fixes: d5edb9ae8d56 ("wifi: mac80211: mesh fast xmit support") Signed-off-by: Felix Fietkau Link: https://msgid.link/20240415121811.13391-1-nbd@nbd.name [use sizeof_field() for key_len] Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 8 +++++++- net/mac80211/mesh.h | 36 +++++++++++++++++++++++++++++++++--- net/mac80211/mesh_pathtbl.c | 31 ++++++++++++++++++++++--------- net/mac80211/rx.c | 13 ++++++++++--- 4 files changed, 72 insertions(+), 16 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 32475da98d73..cbc9b5e40cb3 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -747,6 +747,9 @@ bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 ctrl_flags) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_mesh_fast_tx_key key = { + .type = MESH_FAST_TX_TYPE_LOCAL + }; struct ieee80211_mesh_fast_tx *entry; struct ieee80211s_hdr *meshhdr; u8 sa[ETH_ALEN] __aligned(2); @@ -782,7 +785,10 @@ bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata, return false; } - entry = mesh_fast_tx_get(sdata, skb->data); + ether_addr_copy(key.addr, skb->data); + if (!ether_addr_equal(skb->data + ETH_ALEN, sdata->vif.addr)) + key.type = MESH_FAST_TX_TYPE_PROXIED; + entry = mesh_fast_tx_get(sdata, &key); if (!entry) return false; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index d913ce7ba72e..3f9664e4e00c 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -134,10 +134,39 @@ struct mesh_path { #define MESH_FAST_TX_CACHE_THRESHOLD_SIZE 384 #define MESH_FAST_TX_CACHE_TIMEOUT 8000 /* msecs */ +/** + * enum ieee80211_mesh_fast_tx_type - cached mesh fast tx entry type + * + * @MESH_FAST_TX_TYPE_LOCAL: tx from the local vif address as SA + * @MESH_FAST_TX_TYPE_PROXIED: local tx with a different SA (e.g. bridged) + * @MESH_FAST_TX_TYPE_FORWARDED: forwarded from a different mesh point + * @NUM_MESH_FAST_TX_TYPE: number of entry types + */ +enum ieee80211_mesh_fast_tx_type { + MESH_FAST_TX_TYPE_LOCAL, + MESH_FAST_TX_TYPE_PROXIED, + MESH_FAST_TX_TYPE_FORWARDED, + + /* must be last */ + NUM_MESH_FAST_TX_TYPE +}; + + +/** + * struct ieee80211_mesh_fast_tx_key - cached mesh fast tx entry key + * + * @addr: The Ethernet DA for this entry + * @type: cache entry type + */ +struct ieee80211_mesh_fast_tx_key { + u8 addr[ETH_ALEN] __aligned(2); + u16 type; +}; + /** * struct ieee80211_mesh_fast_tx - cached mesh fast tx entry * @rhash: rhashtable pointer - * @addr_key: The Ethernet DA which is the key for this entry + * @key: the lookup key for this cache entry * @fast_tx: base fast_tx data * @hdr: cached mesh and rfc1042 headers * @hdrlen: length of mesh + rfc1042 @@ -148,7 +177,7 @@ struct mesh_path { */ struct ieee80211_mesh_fast_tx { struct rhash_head rhash; - u8 addr_key[ETH_ALEN] __aligned(2); + struct ieee80211_mesh_fast_tx_key key; struct ieee80211_fast_tx fast_tx; u8 hdr[sizeof(struct ieee80211s_hdr) + sizeof(rfc1042_header)]; @@ -334,7 +363,8 @@ void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); struct ieee80211_mesh_fast_tx * -mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, const u8 *addr); +mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mesh_fast_tx_key *key); bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 ctrl_flags); void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 91b55d6a68b9..a6b62169f084 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -37,8 +37,8 @@ static const struct rhashtable_params mesh_rht_params = { static const struct rhashtable_params fast_tx_rht_params = { .nelem_hint = 10, .automatic_shrinking = true, - .key_len = ETH_ALEN, - .key_offset = offsetof(struct ieee80211_mesh_fast_tx, addr_key), + .key_len = sizeof_field(struct ieee80211_mesh_fast_tx, key), + .key_offset = offsetof(struct ieee80211_mesh_fast_tx, key), .head_offset = offsetof(struct ieee80211_mesh_fast_tx, rhash), .hashfn = mesh_table_hash, }; @@ -431,20 +431,21 @@ static void mesh_fast_tx_entry_free(struct mesh_tx_cache *cache, } struct ieee80211_mesh_fast_tx * -mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) +mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mesh_fast_tx_key *key) { struct ieee80211_mesh_fast_tx *entry; struct mesh_tx_cache *cache; cache = &sdata->u.mesh.tx_cache; - entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params); + entry = rhashtable_lookup(&cache->rht, key, fast_tx_rht_params); if (!entry) return NULL; if (!(entry->mpath->flags & MESH_PATH_ACTIVE) || mpath_expired(entry->mpath)) { spin_lock_bh(&cache->walk_lock); - entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params); + entry = rhashtable_lookup(&cache->rht, key, fast_tx_rht_params); if (entry) mesh_fast_tx_entry_free(cache, entry); spin_unlock_bh(&cache->walk_lock); @@ -489,18 +490,24 @@ void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata, if (!sta) return; + build.key.type = MESH_FAST_TX_TYPE_LOCAL; if ((meshhdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6) { /* This is required to keep the mppath alive */ mppath = mpp_path_lookup(sdata, meshhdr->eaddr1); if (!mppath) return; build.mppath = mppath; + if (!ether_addr_equal(meshhdr->eaddr2, sdata->vif.addr)) + build.key.type = MESH_FAST_TX_TYPE_PROXIED; } else if (ieee80211_has_a4(hdr->frame_control)) { mppath = mpath; } else { return; } + if (!ether_addr_equal(hdr->addr4, sdata->vif.addr)) + build.key.type = MESH_FAST_TX_TYPE_FORWARDED; + /* rate limit, in case fast xmit can't be enabled */ if (mppath->fast_tx_check == jiffies) return; @@ -547,7 +554,7 @@ void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata, } } - memcpy(build.addr_key, mppath->dst, ETH_ALEN); + memcpy(build.key.addr, mppath->dst, ETH_ALEN); build.timestamp = jiffies; build.fast_tx.band = info->band; build.fast_tx.da_offs = offsetof(struct ieee80211_hdr, addr3); @@ -646,12 +653,18 @@ void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; + struct ieee80211_mesh_fast_tx_key key = {}; struct ieee80211_mesh_fast_tx *entry; + int i; + ether_addr_copy(key.addr, addr); spin_lock_bh(&cache->walk_lock); - entry = rhashtable_lookup_fast(&cache->rht, addr, fast_tx_rht_params); - if (entry) - mesh_fast_tx_entry_free(cache, entry); + for (i = 0; i < NUM_MESH_FAST_TX_TYPE; i++) { + key.type = i; + entry = rhashtable_lookup_fast(&cache->rht, &key, fast_tx_rht_params); + if (entry) + mesh_fast_tx_entry_free(cache, entry); + } spin_unlock_bh(&cache->walk_lock); } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 685185dc04f9..6e24864f9a40 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2763,7 +2763,10 @@ ieee80211_rx_mesh_fast_forward(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int hdrlen) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee80211_mesh_fast_tx *entry = NULL; + struct ieee80211_mesh_fast_tx_key key = { + .type = MESH_FAST_TX_TYPE_FORWARDED + }; + struct ieee80211_mesh_fast_tx *entry; struct ieee80211s_hdr *mesh_hdr; struct tid_ampdu_tx *tid_tx; struct sta_info *sta; @@ -2772,9 +2775,13 @@ ieee80211_rx_mesh_fast_forward(struct ieee80211_sub_if_data *sdata, mesh_hdr = (struct ieee80211s_hdr *)(skb->data + sizeof(eth)); if ((mesh_hdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6) - entry = mesh_fast_tx_get(sdata, mesh_hdr->eaddr1); + ether_addr_copy(key.addr, mesh_hdr->eaddr1); else if (!(mesh_hdr->flags & MESH_FLAGS_AE)) - entry = mesh_fast_tx_get(sdata, skb->data); + ether_addr_copy(key.addr, skb->data); + else + return false; + + entry = mesh_fast_tx_get(sdata, &key); if (!entry) return false; From dbfff5bf9292714f02ace002fea8ce6599ea1145 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 15 Apr 2024 11:54:43 +0300 Subject: [PATCH 114/313] wifi: iwlwifi: mvm: remove old PASN station when adding a new one If a PASN station is added, and an old PASN station already exists for the same mac address, remove the old station before adding the new one. Keeping the old station caueses old security context to be used in measurements. Fixes: 0739a7d70e00 ("iwlwifi: mvm: initiator: add option for adding a PASN responder") Signed-off-by: Avraham Stern Signed-off-by: Miri Korenblit Link: https://msgid.link/20240415114847.ef3544a416f2.I4e8c7c8ca22737f4f908ae5cd4fc0b920c703dd3@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c index 4863a3c74640..d84d7e955bb0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c @@ -53,6 +53,8 @@ int iwl_mvm_ftm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, if (!pasn) return -ENOBUFS; + iwl_mvm_ftm_remove_pasn_sta(mvm, addr); + pasn->cipher = iwl_mvm_cipher_to_location_cipher(cipher); switch (pasn->cipher) { From bada85a3f584763deadd201147778c3e791d279c Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 15 Apr 2024 11:54:44 +0300 Subject: [PATCH 115/313] wifi: iwlwifi: mvm: return uid from iwl_mvm_build_scan_cmd This function is supposed to return a uid on success, and an errno in failure. But it currently returns the return value of the specific cmd version handler, which in turn returns 0 on success and errno otherwise. This means that on success, iwl_mvm_build_scan_cmd will return 0 regardless if the actual uid. Fix this by returning the uid if the handler succeeded. Fixes: 687db6ff5b70 ("iwlwifi: scan: make new scan req versioning flow") Signed-off-by: Miri Korenblit Reviewed-by: Ilan Peer Link: https://msgid.link/20240415114847.5e2d602b3190.I4c4931021be74a67a869384c8f8ee7463e0c7857@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index f3e3986b4c72..11559563ae38 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -2813,7 +2813,8 @@ static int iwl_mvm_build_scan_cmd(struct iwl_mvm *mvm, if (ver_handler->version != scan_ver) continue; - return ver_handler->handler(mvm, vif, params, type, uid); + err = ver_handler->handler(mvm, vif, params, type, uid); + return err ? : uid; } err = iwl_mvm_scan_umac(mvm, vif, params, type, uid); From 93b36e1d3748c352a70c69aa378715e6572e51d1 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 12 Apr 2024 15:44:04 -0500 Subject: [PATCH 116/313] arm64: dts: rockchip: Fix USB interface compatible string on kobol-helios64 The correct compatible string for a USB interface node begins with "usbif", not "usb". Fix the Rockchip RK3399 based Kobol Helios64 board. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20240412204405.3703638-1-robh@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts index dfb2a0bdea5b..9586bb12a5d8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts @@ -611,7 +611,7 @@ device@4 { #size-cells = <0>; interface@0 { /* interface 0 of configuration 1 */ - compatible = "usbbda,8156.config1.0"; + compatible = "usbifbda,8156.config1.0"; reg = <0 1>; }; }; From fa845c73497f5e9d2f6f1cf48c3aad05c2fdacb8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 16 Apr 2024 19:16:45 -0400 Subject: [PATCH 117/313] bcachefs: Fix bch2_dev_btree_bitmap_marked_sectors() shift Fixes: 27c15ed297cb bcachefs: bch_member.btree_allocated_bitmap Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 4 ++-- fs/bcachefs/sb-members.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 522a969345e5..5b8e621ac5eb 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -463,8 +463,8 @@ static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, uns m->btree_bitmap_shift += resize; } - for (unsigned bit = sectors >> m->btree_bitmap_shift; - bit << m->btree_bitmap_shift < end; + for (unsigned bit = start >> m->btree_bitmap_shift; + (u64) bit << m->btree_bitmap_shift < end; bit++) bitmap |= BIT_ULL(bit); diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index b27c3e4467cf..5efa64eca5f8 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -235,11 +235,11 @@ static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 { u64 end = start + sectors; - if (end > 64 << ca->mi.btree_bitmap_shift) + if (end > 64ULL << ca->mi.btree_bitmap_shift) return false; - for (unsigned bit = sectors >> ca->mi.btree_bitmap_shift; - bit << ca->mi.btree_bitmap_shift < end; + for (unsigned bit = start >> ca->mi.btree_bitmap_shift; + (u64) bit << ca->mi.btree_bitmap_shift < end; bit++) if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit))) return false; From 605109ff5e43addefdf92d1cfa2a693114430024 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Apr 2024 02:04:23 -0400 Subject: [PATCH 118/313] bcachefs: KEY_TYPE_error is allowed for reflink KEY_TYPE_error is left behind when we have to delete all pointers in an extent in fsck; it allows errors to be correctly returned by reads later. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 085987435a5e..f7fbfccd2b1e 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1504,7 +1504,8 @@ enum btree_id_flags { BIT_ULL(KEY_TYPE_stripe)) \ x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \ BIT_ULL(KEY_TYPE_reflink_v)| \ - BIT_ULL(KEY_TYPE_indirect_inline_data)) \ + BIT_ULL(KEY_TYPE_indirect_inline_data)| \ + BIT_ULL(KEY_TYPE_error)) \ x(subvolumes, 8, 0, \ BIT_ULL(KEY_TYPE_subvolume)) \ x(snapshots, 9, 0, \ From 719aec84b106ba3bd3639eddb2be46c510ef683a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Apr 2024 02:17:21 -0400 Subject: [PATCH 119/313] bcachefs: fix leak in bch2_gc_write_reflink_key Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index ecbd9598f69f..791470b0c654 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1587,7 +1587,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); ret = PTR_ERR_OR_ZERO(new); if (ret) - return ret; + goto out; if (!r->refcount) new->k.type = KEY_TYPE_deleted; @@ -1595,6 +1595,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); ret = bch2_trans_update(trans, iter, new, 0); } +out: fsck_err: printbuf_exit(&buf); return ret; From 0389c09b2fb702ca7924ddf550ce0c8af708b8be Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Apr 2024 17:27:43 -0400 Subject: [PATCH 120/313] bcachefs: Fix bio alloc in check_extent_checksum() if the buffer is virtually mapped it won't be a single bvec Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index fadb1078903d..a20044201002 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -470,7 +470,7 @@ static int check_extent_checksum(struct btree_trans *trans, goto err; } - bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL); + bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); bio->bi_iter.bi_sector = p.ptr.offset; bch2_bio_map(bio, data_buf, bytes); ret = submit_bio_wait(bio); From e5a78fdec0114266d3c47df413d2d7955807fad9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 24 Jan 2023 20:44:38 +0100 Subject: [PATCH 121/313] btrfs: remove colon from messages with state The message format in syslog is usually made of two parts: prefix ":" message Various tools parse the prefix up to the first ":". When there's an additional status of a btrfs filesystem like [5.199782] BTRFS info (device nvme1n1p1: state M): use zstd compression, level 9 where 'M' is for remount, there's one more ":" that does not conform to the format. Remove it. Reviewed-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/messages.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c index c96dd66fd0f7..210d9c82e2ae 100644 --- a/fs/btrfs/messages.c +++ b/fs/btrfs/messages.c @@ -7,7 +7,7 @@ #ifdef CONFIG_PRINTK -#define STATE_STRING_PREFACE ": state " +#define STATE_STRING_PREFACE " state " #define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT + 1) /* From 7192833c4e55b26e8f15ef58577867a1bc808036 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 9 Apr 2024 23:18:52 +0900 Subject: [PATCH 122/313] btrfs: scrub: run relocation repair when/only needed When btrfs scrub finds an error, it reads mirrors to find correct data. If all the errors are fixed, sctx->error_bitmap is cleared for the stripe range. However, in the zoned mode, it runs relocation to repair scrub errors when the bitmap is *not* empty, which is a flipped condition. Also, it runs the relocation even if the scrub is read-only. This was missed by a fix in commit 1f2030ff6e49 ("btrfs: scrub: respect the read-only flag during repair"). The repair is only necessary when there is a repaired sector and should be done on read-write scrub. So, tweak the condition for both regular and zoned case. Fixes: 54765392a1b9 ("btrfs: scrub: introduce helper to queue a stripe for scrub") Fixes: 1f2030ff6e49 ("btrfs: scrub: respect the read-only flag during repair") CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index fa25004ab04e..4b22cfe9a98c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1012,6 +1012,7 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work) struct btrfs_fs_info *fs_info = sctx->fs_info; int num_copies = btrfs_num_copies(fs_info, stripe->bg->start, stripe->bg->length); + unsigned long repaired; int mirror; int i; @@ -1078,16 +1079,15 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work) * Submit the repaired sectors. For zoned case, we cannot do repair * in-place, but queue the bg to be relocated. */ - if (btrfs_is_zoned(fs_info)) { - if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) + bitmap_andnot(&repaired, &stripe->init_error_bitmap, &stripe->error_bitmap, + stripe->nr_sectors); + if (!sctx->readonly && !bitmap_empty(&repaired, stripe->nr_sectors)) { + if (btrfs_is_zoned(fs_info)) { btrfs_repair_one_zone(fs_info, sctx->stripes[0].bg->start); - } else if (!sctx->readonly) { - unsigned long repaired; - - bitmap_andnot(&repaired, &stripe->init_error_bitmap, - &stripe->error_bitmap, stripe->nr_sectors); - scrub_write_sectors(sctx, stripe, repaired, false); - wait_scrub_stripe_io(stripe); + } else { + scrub_write_sectors(sctx, stripe, repaired, false); + wait_scrub_stripe_io(stripe); + } } scrub_stripe_report_errors(sctx, stripe); From 131a821a243f89be312ced9e62ccc37b2cf3846c Mon Sep 17 00:00:00 2001 From: Sweet Tea Dorminy Date: Sat, 6 Apr 2024 04:45:02 -0400 Subject: [PATCH 123/313] btrfs: fallback if compressed IO fails for ENOSPC In commit b4ccace878f4 ("btrfs: refactor submit_compressed_extents()"), if an async extent compressed but failed to find enough space, we changed from falling back to an uncompressed write to just failing the write altogether. The principle was that if there's not enough space to write the compressed version of the data, there can't possibly be enough space to write the larger, uncompressed version of the data. However, this isn't necessarily true: due to fragmentation, there could be enough discontiguous free blocks to write the uncompressed version, but not enough contiguous free blocks to write the smaller but unsplittable compressed version. This has occurred to an internal workload which relied on write()'s return value indicating there was space. While rare, it has happened a few times. Thus, in order to prevent early ENOSPC, re-add a fallback to uncompressed writing. Fixes: b4ccace878f4 ("btrfs: refactor submit_compressed_extents()") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Co-developed-by: Neal Gompa Signed-off-by: Neal Gompa Signed-off-by: Sweet Tea Dorminy Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c65fe5de4022..7fed887e700c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1145,13 +1145,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, 0, *alloc_hint, &ins, 1, 1); if (ret) { /* - * Here we used to try again by going back to non-compressed - * path for ENOSPC. But we can't reserve space even for - * compressed size, how could it work for uncompressed size - * which requires larger size? So here we directly go error - * path. + * We can't reserve contiguous space for the compressed size. + * Unlikely, but it's possible that we could have enough + * non-contiguous space for the uncompressed size instead. So + * fall back to uncompressed. */ - goto out_free; + submit_uncompressed_range(inode, async_extent, locked_page); + goto done; } /* Here we're doing allocation and writeback of the compressed pages */ @@ -1203,7 +1203,6 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, out_free_reserve: btrfs_dec_block_group_reservations(fs_info, ins.objectid); btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); -out_free: mapping_set_error(inode->vfs_inode.i_mapping, -EIO); extent_clear_unlock_delalloc(inode, start, end, NULL, EXTENT_LOCKED | EXTENT_DELALLOC | From 6aff4c26ed677b1f464f721fbd3e7767f24a684d Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Mon, 15 Apr 2024 13:27:31 +0200 Subject: [PATCH 124/313] drm/gma500: Remove lid code Due to a change in the order of initialization, the lid timer got started before proper setup was made. This resulted in a crash during boot. The lid switch is handled by gma500 through a timer that periodically polls the opregion for changes. These types of ACPI events shouldn't be handled by the graphics driver so let's get rid of the lid code. This fixes the crash during boot. Reported-by: Enrico Bartky Fixes: 8f1aaccb04b7 ("drm/gma500: Implement client-based fbdev emulation") Tested-by: Enrico Bartky Signed-off-by: Patrik Jakobsson Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240415112731.31841-1-patrik.r.jakobsson@gmail.com --- drivers/gpu/drm/gma500/Makefile | 1 - drivers/gpu/drm/gma500/psb_device.c | 5 +- drivers/gpu/drm/gma500/psb_drv.h | 9 ---- drivers/gpu/drm/gma500/psb_lid.c | 80 ----------------------------- 4 files changed, 1 insertion(+), 94 deletions(-) delete mode 100644 drivers/gpu/drm/gma500/psb_lid.c diff --git a/drivers/gpu/drm/gma500/Makefile b/drivers/gpu/drm/gma500/Makefile index 4f302cd5e1a6..58fed80c7392 100644 --- a/drivers/gpu/drm/gma500/Makefile +++ b/drivers/gpu/drm/gma500/Makefile @@ -34,7 +34,6 @@ gma500_gfx-y += \ psb_intel_lvds.o \ psb_intel_modes.o \ psb_intel_sdvo.o \ - psb_lid.o \ psb_irq.o gma500_gfx-$(CONFIG_ACPI) += opregion.o diff --git a/drivers/gpu/drm/gma500/psb_device.c b/drivers/gpu/drm/gma500/psb_device.c index dcfcd7b89d4a..6dece8f0e380 100644 --- a/drivers/gpu/drm/gma500/psb_device.c +++ b/drivers/gpu/drm/gma500/psb_device.c @@ -73,8 +73,7 @@ static int psb_backlight_setup(struct drm_device *dev) } psb_intel_lvds_set_brightness(dev, PSB_MAX_BRIGHTNESS); - /* This must occur after the backlight is properly initialised */ - psb_lid_timer_init(dev_priv); + return 0; } @@ -259,8 +258,6 @@ static int psb_chip_setup(struct drm_device *dev) static void psb_chip_teardown(struct drm_device *dev) { - struct drm_psb_private *dev_priv = to_drm_psb_private(dev); - psb_lid_timer_takedown(dev_priv); gma_intel_teardown_gmbus(dev); } diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h index c5edfa4aa4cc..83c17689c454 100644 --- a/drivers/gpu/drm/gma500/psb_drv.h +++ b/drivers/gpu/drm/gma500/psb_drv.h @@ -162,7 +162,6 @@ #define PSB_NUM_VBLANKS 2 #define PSB_WATCHDOG_DELAY (HZ * 2) -#define PSB_LID_DELAY (HZ / 10) #define PSB_MAX_BRIGHTNESS 100 @@ -491,11 +490,7 @@ struct drm_psb_private { /* Hotplug handling */ struct work_struct hotplug_work; - /* LID-Switch */ - spinlock_t lid_lock; - struct timer_list lid_timer; struct psb_intel_opregion opregion; - u32 lid_last_state; /* Watchdog */ uint32_t apm_reg; @@ -591,10 +586,6 @@ struct psb_ops { int i2c_bus; /* I2C bus identifier for Moorestown */ }; -/* psb_lid.c */ -extern void psb_lid_timer_init(struct drm_psb_private *dev_priv); -extern void psb_lid_timer_takedown(struct drm_psb_private *dev_priv); - /* modesetting */ extern void psb_modeset_init(struct drm_device *dev); extern void psb_modeset_cleanup(struct drm_device *dev); diff --git a/drivers/gpu/drm/gma500/psb_lid.c b/drivers/gpu/drm/gma500/psb_lid.c deleted file mode 100644 index 58a7fe392636..000000000000 --- a/drivers/gpu/drm/gma500/psb_lid.c +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/************************************************************************** - * Copyright (c) 2007, Intel Corporation. - * - * Authors: Thomas Hellstrom - **************************************************************************/ - -#include - -#include "psb_drv.h" -#include "psb_intel_reg.h" -#include "psb_reg.h" - -static void psb_lid_timer_func(struct timer_list *t) -{ - struct drm_psb_private *dev_priv = from_timer(dev_priv, t, lid_timer); - struct drm_device *dev = (struct drm_device *)&dev_priv->dev; - struct timer_list *lid_timer = &dev_priv->lid_timer; - unsigned long irq_flags; - u32 __iomem *lid_state = dev_priv->opregion.lid_state; - u32 pp_status; - - if (readl(lid_state) == dev_priv->lid_last_state) - goto lid_timer_schedule; - - if ((readl(lid_state)) & 0x01) { - /*lid state is open*/ - REG_WRITE(PP_CONTROL, REG_READ(PP_CONTROL) | POWER_TARGET_ON); - do { - pp_status = REG_READ(PP_STATUS); - } while ((pp_status & PP_ON) == 0 && - (pp_status & PP_SEQUENCE_MASK) != 0); - - if (REG_READ(PP_STATUS) & PP_ON) { - /*FIXME: should be backlight level before*/ - psb_intel_lvds_set_brightness(dev, 100); - } else { - DRM_DEBUG("LVDS panel never powered up"); - return; - } - } else { - psb_intel_lvds_set_brightness(dev, 0); - - REG_WRITE(PP_CONTROL, REG_READ(PP_CONTROL) & ~POWER_TARGET_ON); - do { - pp_status = REG_READ(PP_STATUS); - } while ((pp_status & PP_ON) == 0); - } - dev_priv->lid_last_state = readl(lid_state); - -lid_timer_schedule: - spin_lock_irqsave(&dev_priv->lid_lock, irq_flags); - if (!timer_pending(lid_timer)) { - lid_timer->expires = jiffies + PSB_LID_DELAY; - add_timer(lid_timer); - } - spin_unlock_irqrestore(&dev_priv->lid_lock, irq_flags); -} - -void psb_lid_timer_init(struct drm_psb_private *dev_priv) -{ - struct timer_list *lid_timer = &dev_priv->lid_timer; - unsigned long irq_flags; - - spin_lock_init(&dev_priv->lid_lock); - spin_lock_irqsave(&dev_priv->lid_lock, irq_flags); - - timer_setup(lid_timer, psb_lid_timer_func, 0); - - lid_timer->expires = jiffies + PSB_LID_DELAY; - - add_timer(lid_timer); - spin_unlock_irqrestore(&dev_priv->lid_lock, irq_flags); -} - -void psb_lid_timer_takedown(struct drm_psb_private *dev_priv) -{ - del_timer_sync(&dev_priv->lid_timer); -} - From 2f7ef5bb4a2f3e481ef05fab946edb97c84f67cf Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 17 Apr 2024 10:45:47 +0200 Subject: [PATCH 125/313] btrfs: fix information leak in btrfs_ioctl_logical_to_ino() Syzbot reported the following information leak for in btrfs_ioctl_logical_to_ino(): BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_user+0xbc/0x110 lib/usercopy.c:40 instrument_copy_to_user include/linux/instrumented.h:114 [inline] _copy_to_user+0xbc/0x110 lib/usercopy.c:40 copy_to_user include/linux/uaccess.h:191 [inline] btrfs_ioctl_logical_to_ino+0x440/0x750 fs/btrfs/ioctl.c:3499 btrfs_ioctl+0x714/0x1260 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:904 [inline] __se_sys_ioctl+0x261/0x450 fs/ioctl.c:890 __x64_sys_ioctl+0x96/0xe0 fs/ioctl.c:890 x64_sys_call+0x1883/0x3b50 arch/x86/include/generated/asm/syscalls_64.h:17 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was created at: __kmalloc_large_node+0x231/0x370 mm/slub.c:3921 __do_kmalloc_node mm/slub.c:3954 [inline] __kmalloc_node+0xb07/0x1060 mm/slub.c:3973 kmalloc_node include/linux/slab.h:648 [inline] kvmalloc_node+0xc0/0x2d0 mm/util.c:634 kvmalloc include/linux/slab.h:766 [inline] init_data_container+0x49/0x1e0 fs/btrfs/backref.c:2779 btrfs_ioctl_logical_to_ino+0x17c/0x750 fs/btrfs/ioctl.c:3480 btrfs_ioctl+0x714/0x1260 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:904 [inline] __se_sys_ioctl+0x261/0x450 fs/ioctl.c:890 __x64_sys_ioctl+0x96/0xe0 fs/ioctl.c:890 x64_sys_call+0x1883/0x3b50 arch/x86/include/generated/asm/syscalls_64.h:17 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Bytes 40-65535 of 65536 are uninitialized Memory access of size 65536 starts at ffff888045a40000 This happens, because we're copying a 'struct btrfs_data_container' back to user-space. This btrfs_data_container is allocated in 'init_data_container()' via kvmalloc(), which does not zero-fill the memory. Fix this by using kvzalloc() which zeroes out the memory on allocation. CC: stable@vger.kernel.org # 4.14+ Reported-by: Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/backref.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index c1e6a5bbeeaf..58110c968667 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2776,20 +2776,14 @@ struct btrfs_data_container *init_data_container(u32 total_bytes) size_t alloc_bytes; alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); - data = kvmalloc(alloc_bytes, GFP_KERNEL); + data = kvzalloc(alloc_bytes, GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); - if (total_bytes >= sizeof(*data)) { + if (total_bytes >= sizeof(*data)) data->bytes_left = total_bytes - sizeof(*data); - data->bytes_missing = 0; - } else { + else data->bytes_missing = sizeof(*data) - total_bytes; - data->bytes_left = 0; - } - - data->elem_cnt = 0; - data->elem_missed = 0; return data; } From fe1c6c7acce10baf9521d6dccc17268d91ee2305 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 9 Apr 2024 20:32:34 +0930 Subject: [PATCH 126/313] btrfs: fix wrong block_start calculation for btrfs_drop_extent_map_range() [BUG] During my extent_map cleanup/refactor, with extra sanity checks, extent-map-tests::test_case_7() would not pass the checks. The problem is, after btrfs_drop_extent_map_range(), the resulted extent_map has a @block_start way too large. Meanwhile my btrfs_file_extent_item based members are returning a correct @disk_bytenr/@offset combination. The extent map layout looks like this: 0 16K 32K 48K | PINNED | | Regular | The regular em at [32K, 48K) also has 32K @block_start. Then drop range [0, 36K), which should shrink the regular one to be [36K, 48K). However the @block_start is incorrect, we expect 32K + 4K, but got 52K. [CAUSE] Inside btrfs_drop_extent_map_range() function, if we hit an extent_map that covers the target range but is still beyond it, we need to split that extent map into half: |<-- drop range -->| |<----- existing extent_map --->| And if the extent map is not compressed, we need to forward extent_map::block_start by the difference between the end of drop range and the extent map start. However in that particular case, the difference is calculated using (start + len - em->start). The problem is @start can be modified if the drop range covers any pinned extent. This leads to wrong calculation, and would be caught by my later extent_map sanity checks, which checks the em::block_start against btrfs_file_extent_item::disk_bytenr + btrfs_file_extent_item::offset. This is a regression caused by commit c962098ca4af ("btrfs: fix incorrect splitting in btrfs_drop_extent_map_range"), which removed the @len update for pinned extents. [FIX] Fix it by avoiding using @start completely, and use @end - em->start instead, which @end is exclusive bytenr number. And update the test case to verify the @block_start to prevent such problem from happening. Thankfully this is not going to lead to any data corruption, as IO path does not utilize btrfs_drop_extent_map_range() with @skip_pinned set. So this fix is only here for the sake of consistency/correctness. CC: stable@vger.kernel.org # 6.5+ Fixes: c962098ca4af ("btrfs: fix incorrect splitting in btrfs_drop_extent_map_range") Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_map.c | 2 +- fs/btrfs/tests/extent-map-tests.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 445f7716f1e2..24a048210b15 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -817,7 +817,7 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, split->block_len = em->block_len; split->orig_start = em->orig_start; } else { - const u64 diff = start + len - em->start; + const u64 diff = end - em->start; split->block_len = split->len; split->block_start += diff; diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index 253cce7ffecf..47b5d301038e 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -847,6 +847,11 @@ static int test_case_7(struct btrfs_fs_info *fs_info) goto out; } + if (em->block_start != SZ_32K + SZ_4K) { + test_err("em->block_start is %llu, expected 36K", em->block_start); + goto out; + } + free_extent_map(em); read_lock(&em_tree->lock); From 6a94cf996f104633bfb8d260eedf96a0dbebb384 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 19 Apr 2024 08:51:47 +0200 Subject: [PATCH 127/313] Revert "wifi: iwlwifi: bump FW API to 90 for BZ/SC devices" Revert the API version bump, the kernel doesn't actually have all the code to deal with that version yet. Fixes: 653a90f6b226 ("wifi: iwlwifi: bump FW API to 90 for BZ/SC devices") Link: https://msgid.link/20240419085147.cd756fadab03.Ibccbb65be8e05b516cae1b9fb27a959662f9f51a@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/cfg/bz.c | 2 +- drivers/net/wireless/intel/iwlwifi/cfg/sc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c index 072b0a5827d1..eca1457caa0c 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c @@ -10,7 +10,7 @@ #include "fw/api/txq.h" /* Highest firmware API version supported */ -#define IWL_BZ_UCODE_API_MAX 90 +#define IWL_BZ_UCODE_API_MAX 89 /* Lowest firmware API version supported */ #define IWL_BZ_UCODE_API_MIN 80 diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c index 9b79279fd76c..dbbcb2d0968c 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c @@ -10,7 +10,7 @@ #include "fw/api/txq.h" /* Highest firmware API version supported */ -#define IWL_SC_UCODE_API_MAX 90 +#define IWL_SC_UCODE_API_MAX 89 /* Lowest firmware API version supported */ #define IWL_SC_UCODE_API_MIN 82 From 89884459a0b9e6ecd62a1ddfdb7708b34ee33649 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:19 +0200 Subject: [PATCH 128/313] wifi: mac80211: fix idle calculation with multi-link The vif's idle state doesn't automatically go to true when any link removes the channel context, it's only idle when _all_ links no longer have a channel context. Fix that. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.90df97557702.I05d2228ce85c203b9f2d6da8538cc16dce46752a@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 80e4b9784131..ccacaed32817 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -797,6 +797,7 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link, struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *curr_ctx = NULL; + bool new_idle; int ret = 0; if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN)) @@ -829,8 +830,6 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link, out: rcu_assign_pointer(link->conf->chanctx_conf, conf); - sdata->vif.cfg.idle = !conf; - if (curr_ctx && ieee80211_chanctx_num_assigned(local, curr_ctx) > 0) { ieee80211_recalc_chanctx_chantype(local, curr_ctx); ieee80211_recalc_smps_chanctx(local, curr_ctx); @@ -843,9 +842,27 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link, ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL); } - if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && - sdata->vif.type != NL80211_IFTYPE_MONITOR) - ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_IDLE); + if (conf) { + new_idle = false; + } else { + struct ieee80211_link_data *tmp; + + new_idle = true; + for_each_sdata_link(local, tmp) { + if (rcu_access_pointer(tmp->conf->chanctx_conf)) { + new_idle = false; + break; + } + } + } + + if (new_idle != sdata->vif.cfg.idle) { + sdata->vif.cfg.idle = new_idle; + + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_MONITOR) + ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_IDLE); + } ieee80211_check_fast_xmit_iface(sdata); From 645acc6f55918feacc4572dd80acbb152b2208d9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:20 +0200 Subject: [PATCH 129/313] wifi: mac80211: mlme: re-parse with correct mode When doing re-parsing in ieee80211_determine_chan_mode(), the conn->mode is changed, and the whole point of doing the parsing again was to parse as the downgraded mode. However, that didn't actually work, because the setting was copied before and never changed again. Fix that. Fixes: 310c8387c638 ("wifi: mac80211: clean up connection process") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.5e0d1fcb5622.Ib0673e0bc90033fd6d387b6a5f107c040eb907cf@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index db7128f6c901..98cb475a14c8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -616,7 +616,6 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, .from_ap = true, .start = ies->data, .len = ies->len, - .mode = conn->mode, }; struct ieee802_11_elems *elems; struct ieee80211_supported_band *sband; @@ -625,6 +624,7 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, int ret; again: + parse_params.mode = conn->mode; elems = ieee802_11_parse_elems_full(&parse_params); if (!elems) return ERR_PTR(-ENOMEM); From 1ac6f60aab36ae3f0520cc7ace02ad32240b8a1f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:21 +0200 Subject: [PATCH 130/313] wifi: mac80211: mlme: fix memory leak When re-parsing the elements here (with changed mode), free the original ones first to avoid leaking memory. Fixes: 310c8387c638 ("wifi: mac80211: clean up connection process") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.458421e3bbff.Icb5b84cba3ea420794cf009cf18ec3d76e434736@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 98cb475a14c8..6fa3752b740e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -753,8 +753,10 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, } /* the mode can only decrease, so this must terminate */ - if (ap_mode != conn->mode) + if (ap_mode != conn->mode) { + kfree(elems); goto again; + } mlme_link_id_dbg(sdata, link_id, "connecting with %s mode, max bandwidth %d MHz\n", From 2fb5dfe18e8255dbec4d0f8e81297de8e3490285 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:22 +0200 Subject: [PATCH 131/313] wifi: mac80211: mlme: re-parse if AP mode is less than client If the AP mode ends up being determined less than the client mode, there may be different reasons for this, e.g. AP misconfiguration. If this happens in a way that causes e.g. EHT to be rejected, the elements need to be re-parsed since we'll connect as HE, but not reparsing means that we'll still think it's OK to use multi-link, so we can connect in a non-sensical configuration of advertising only HE on a secondary link. This normally won't happen for the assoc link because that reuses the mode from authentication, and if that's not EHT, multi-link association is rejected. Fix this inconsistency by parsing the elements again if the mode was different from the first parsing attempt. Print the message a bit later to avoid printing "determined AP ... to be HE" twice in cases where ieee80211_determine_ap_chan() returned a lesser mode, rather than the regulatory downgrades below changing it. Fixes: 310c8387c638 ("wifi: mac80211: clean up connection process") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.d1f25d92cfe7.Ia21eff6cdcae2f5aca13cf8e742a986af5e70f89@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6fa3752b740e..502c34d52fbe 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -632,15 +632,21 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, ap_mode = ieee80211_determine_ap_chan(sdata, channel, bss->vht_cap_info, elems, false, conn, &ap_chandef); - mlme_link_id_dbg(sdata, link_id, "determined AP %pM to be %s\n", - cbss->bssid, ieee80211_conn_mode_str(ap_mode)); - /* this should be impossible since parsing depends on our mode */ if (WARN_ON(ap_mode > conn->mode)) { ret = -EINVAL; goto free; } + if (conn->mode != ap_mode) { + conn->mode = ap_mode; + kfree(elems); + goto again; + } + + mlme_link_id_dbg(sdata, link_id, "determined AP %pM to be %s\n", + cbss->bssid, ieee80211_conn_mode_str(ap_mode)); + sband = sdata->local->hw.wiphy->bands[channel->band]; switch (channel->band) { @@ -691,7 +697,6 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, break; } - conn->mode = ap_mode; chanreq->oper = ap_chandef; /* wider-bandwidth OFDMA is only done in EHT */ From 801ea33ae82d6a9d954074fbcf8ea9d18f1543a7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:23 +0200 Subject: [PATCH 132/313] wifi: nl80211: don't free NULL coalescing rule If the parsing fails, we can dereference a NULL pointer here. Cc: stable@vger.kernel.org Fixes: be29b99a9b51 ("cfg80211/nl80211: Add packet coalesce support") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.b328f80406e7.Id75d961050deb05b3e4e354e024866f350c68103@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b4edba6b0b7b..30ff9a470813 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -14030,6 +14030,8 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) error: for (i = 0; i < new_coalesce.n_rules; i++) { tmp_rule = &new_coalesce.rules[i]; + if (!tmp_rule) + continue; for (j = 0; j < tmp_rule->n_patterns; j++) kfree(tmp_rule->patterns[j].mask); kfree(tmp_rule->patterns); From 2a4e01e5270b9fa9f6e6e0a4c24ac51a758636f9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:24 +0200 Subject: [PATCH 133/313] wifi: mac80211_hwsim: init peer measurement result If we don't get all the values here, we might pass them to cfg80211 uninitialized. Fix that, even if the input might then not make much sense. Fixes: 2af3b2a631b1 ("mac80211_hwsim: add PMSR report support via virtio") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.e1317621c1f9.If7dd447de24d7493d133284db5e9e482e4e299f8@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index b55fe320633c..59e1fc0018df 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -3899,7 +3899,7 @@ static int hwsim_pmsr_report_nl(struct sk_buff *msg, struct genl_info *info) } nla_for_each_nested(peer, peers, rem) { - struct cfg80211_pmsr_result result; + struct cfg80211_pmsr_result result = {}; err = mac80211_hwsim_parse_pmsr_result(peer, &result, info); if (err) From cb55e08dba3526796e35d24a6d5db4ed6dcb8a4b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:25 +0200 Subject: [PATCH 134/313] wifi: mac80211: remove link before AP If the AP removal timer is long, we don't really want to remove the link immediately. However, we really should do it _before_ the AP removes it (which happens at or after count reaches 0), so subtract 1 from the countdown when scheduling the timer. This causes the link removal work to run just after the beacon with value 1 is received. If the counter is already zero, do it immediately. This fixes an issue where we do the removal too late and receive a beacon from the AP that's no longer associated with the MLD, but thus removed EHT and ML elements, and then we disconnect instead from the whole MLD, since one of the associated APs changed mode from EHT to HE. Fixes: 8eb8dd2ffbbb ("wifi: mac80211: Support link removal using Reconfiguration ML element") Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.03ac4a09fa74.Ifb8c8d38e3402721a81ce5981568f47b5c5889cb@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 502c34d52fbe..6f0880ec89da 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5844,8 +5844,11 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, continue; } - link_delay = link_conf->beacon_int * - link_removal_timeout[link_id]; + if (link_removal_timeout[link_id] < 1) + link_delay = 0; + else + link_delay = link_conf->beacon_int * + (link_removal_timeout[link_id] - 1); if (!delay) delay = link_delay; From c53d8a59351e4347452e263e2e5d7446ec93da83 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:26 +0200 Subject: [PATCH 135/313] wifi: mac80211: fix unaligned le16 access The AP removal timer field need not be aligned, so the code shouldn't access it directly, but use unaligned loads. Use get_unaligned_le16(), which even is shorter than the current code since it doesn't need a cast. Fixes: 8eb8dd2ffbbb ("wifi: mac80211: Support link removal using Reconfiguration ML element") Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.356788ba0045.I2b3cdb3644e205d5bb10322c345c0499171cf5d2@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6f0880ec89da..3bbb216a0fc8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5819,7 +5819,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, */ if (control & IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT) - link_removal_timeout[link_id] = le16_to_cpu(*(__le16 *)pos); + link_removal_timeout[link_id] = get_unaligned_le16(pos); } removed_links &= sdata->vif.valid_links; From 69197dfc64007b5292cc960581548f41ccd44828 Mon Sep 17 00:00:00 2001 From: Duanqiang Wen Date: Thu, 18 Apr 2024 10:15:56 +0800 Subject: [PATCH 136/313] net: libwx: fix alloc msix vectors failed driver needs queue msix vectors and one misc irq vector, but only queue vectors need irq affinity. when num_online_cpus is less than chip max msix vectors, driver will acquire (num_online_cpus + 1) vecotrs, and call pci_alloc_irq_vectors_affinity functions with affinity params without setting pre_vectors or post_vectors, it will cause return error code -ENOSPC. Misc irq vector is vector 0, driver need to set affinity params .pre_vectors = 1. Fixes: 3f703186113f ("net: libwx: Add irq flow functions") Signed-off-by: Duanqiang Wen Signed-off-by: David S. Miller --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 6dff2c85682d..6fae161cbcb8 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1598,7 +1598,7 @@ static void wx_set_num_queues(struct wx *wx) */ static int wx_acquire_msix_vectors(struct wx *wx) { - struct irq_affinity affd = {0, }; + struct irq_affinity affd = { .pre_vectors = 1 }; int nvecs, i; /* We start by asking for one vector per queue pair */ From f58f45c1e5b92975e91754f5407250085a6ae7cf Mon Sep 17 00:00:00 2001 From: David Bauer Date: Thu, 18 Apr 2024 15:29:08 +0200 Subject: [PATCH 137/313] vxlan: drop packets from invalid src-address The VXLAN driver currently does not check if the inner layer2 source-address is valid. In case source-address snooping/learning is enabled, a entry in the FDB for the invalid address is created with the layer3 address of the tunnel endpoint. If the frame happens to have a non-unicast address set, all this non-unicast traffic is subsequently not flooded to the tunnel network but sent to the learnt host in the FDB. To make matters worse, this FDB entry does not expire. Apply the same filtering for packets as it is done for bridges. This not only drops these invalid packets but avoids them from being learnt into the FDB. Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Suggested-by: Ido Schimmel Signed-off-by: David Bauer Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 3495591a5c29..ba319fc21957 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1615,6 +1615,10 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan, if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr)) return false; + /* Ignore packets from invalid src-address */ + if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) + return false; + /* Get address from the outer IP header */ if (vxlan_get_sk_family(vs) == AF_INET) { saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; From a386c30410450ea87cd38070f9feaca49dadce29 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 4 Apr 2024 10:17:56 +0200 Subject: [PATCH 138/313] drm/atomic-helper: fix parameter order in drm_format_conv_state_copy() call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Old and new state parameters are swapped, so the old state was cleared instead of the new duplicated state. Fixes: 903674588a48 ("drm/atomic-helper: Add format-conversion state to shadow-plane state") Signed-off-by: Lucas Stach Tested-by: Leonard Göhrs Reviewed-by: Thomas Zimmermann Cc: # v6.8+ Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240404081756.2714424-1-l.stach@pengutronix.de --- drivers/gpu/drm/drm_gem_atomic_helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e440f458b663..93337543aac3 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -224,8 +224,8 @@ __drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane, __drm_atomic_helper_plane_duplicate_state(plane, &new_shadow_plane_state->base); - drm_format_conv_state_copy(&shadow_plane_state->fmtcnv_state, - &new_shadow_plane_state->fmtcnv_state); + drm_format_conv_state_copy(&new_shadow_plane_state->fmtcnv_state, + &shadow_plane_state->fmtcnv_state); } EXPORT_SYMBOL(__drm_gem_duplicate_shadow_plane_state); From dad80c6bff770d25f67ec25fe011730e4a463008 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Apr 2024 16:05:19 +0100 Subject: [PATCH 139/313] cifs: Fix reacquisition of volume cookie on still-live connection During mount, cifs_mount_get_tcon() gets a tcon resource connection record and then attaches an fscache volume cookie to it. However, it does this irrespective of whether or not the tcon returned from cifs_get_tcon() is a new record or one that's already in use. This leads to a warning about a volume cookie collision and a leaked volume cookie because tcon->fscache gets reset. Fix this be adding a mutex and a "we've already tried this" flag and only doing it once for the lifetime of the tcon. [!] Note: Looking at cifs_mount_get_tcon(), a more general solution may actually be required. Reacquiring the volume cookie isn't the only thing that function does: it also partially reinitialises the tcon record without any locking - which may cause live filesystem ops already using the tcon through a previous mount to malfunction. This can be reproduced simply by something like: mount //example.com/test /xfstest.test -o user=shares,pass=xxx,fsc mount //example.com/test /mnt -o user=shares,pass=xxx,fsc Fixes: 70431bfd825d ("cifs: Support fscache indexing rewrite") Signed-off-by: David Howells Acked-by: Paulo Alcantara (Red Hat) cc: Shyam Prasad N cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 2 ++ fs/smb/client/fscache.c | 13 +++++++++++++ fs/smb/client/misc.c | 3 +++ 3 files changed, 18 insertions(+) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index d6669ce4ae87..fc09d1c0ee07 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1276,7 +1276,9 @@ struct cifs_tcon { __u32 max_cached_dirs; #ifdef CONFIG_CIFS_FSCACHE u64 resource_id; /* server resource id */ + bool fscache_acquired; /* T if we've tried acquiring a cookie */ struct fscache_volume *fscache; /* cookie for share */ + struct mutex fscache_lock; /* Prevent regetting a cookie */ #endif struct list_head pending_opens; /* list of incomplete opens */ struct cached_fids *cfids; diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c index 340efce8f052..113bde8f1e61 100644 --- a/fs/smb/client/fscache.c +++ b/fs/smb/client/fscache.c @@ -43,12 +43,23 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) char *key; int ret = -ENOMEM; + if (tcon->fscache_acquired) + return 0; + + mutex_lock(&tcon->fscache_lock); + if (tcon->fscache_acquired) { + mutex_unlock(&tcon->fscache_lock); + return 0; + } + tcon->fscache_acquired = true; + tcon->fscache = NULL; switch (sa->sa_family) { case AF_INET: case AF_INET6: break; default: + mutex_unlock(&tcon->fscache_lock); cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family); return -EINVAL; } @@ -57,6 +68,7 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) sharename = extract_sharename(tcon->tree_name); if (IS_ERR(sharename)) { + mutex_unlock(&tcon->fscache_lock); cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__); return PTR_ERR(sharename); } @@ -90,6 +102,7 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) kfree(key); out: kfree(sharename); + mutex_unlock(&tcon->fscache_lock); return ret; } diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 7d15a1969b81..ad44f8d66b37 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -139,6 +139,9 @@ tcon_info_alloc(bool dir_leases_enabled) atomic_set(&ret_buf->num_local_opens, 0); atomic_set(&ret_buf->num_remote_opens, 0); ret_buf->stats_from_time = ktime_get_real_seconds(); +#ifdef CONFIG_CIFS_FSCACHE + mutex_init(&ret_buf->fscache_lock); +#endif return ret_buf; } From afc23febd51c7e24361e3a9c09f3e892eb0a41ea Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Apr 2024 13:51:36 +0100 Subject: [PATCH 140/313] cifs: Add tracing for the cifs_tcon struct refcounting Add tracing for the refcounting/lifecycle of the cifs_tcon struct, marking different events with different labels and giving each tcon its own debug ID so that the tracelines corresponding to individual tcons can be distinguished. This can be enabled with: echo 1 >/sys/kernel/debug/tracing/events/cifs/smb3_tcon_ref/enable Signed-off-by: David Howells Acked-by: Paulo Alcantara (Red Hat) cc: Shyam Prasad N cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 2 + fs/smb/client/cifsglob.h | 1 + fs/smb/client/cifsproto.h | 9 ++-- fs/smb/client/connect.c | 21 ++++---- fs/smb/client/fscache.c | 7 +++ fs/smb/client/misc.c | 10 ++-- fs/smb/client/smb2misc.c | 10 ++-- fs/smb/client/smb2ops.c | 7 ++- fs/smb/client/smb2pdu.c | 8 +-- fs/smb/client/smb2transport.c | 2 + fs/smb/client/trace.h | 92 ++++++++++++++++++++++++++++++++++- 11 files changed, 143 insertions(+), 26 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index d41eedbff674..30781789dfd9 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -739,6 +739,8 @@ static void cifs_umount_begin(struct super_block *sb) spin_lock(&cifs_tcp_ses_lock); spin_lock(&tcon->tc_lock); + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_umount); if ((tcon->tc_count > 1) || (tcon->status == TID_EXITING)) { /* we have other mounts to same share or we have already tried to umount this and woken up diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index fc09d1c0ee07..6ff35570db81 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1190,6 +1190,7 @@ struct cifs_fattr { */ struct cifs_tcon { struct list_head tcon_list; + int debug_id; /* Debugging for tracing */ int tc_count; struct list_head rlist; /* reconnect list */ spinlock_t tc_lock; /* protect anything here that is not protected */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 8e0a348f1f66..fbc358c09da3 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -303,7 +303,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, struct TCP_Server_Info *primary_server); extern void cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect); -extern void cifs_put_tcon(struct cifs_tcon *tcon); +extern void cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace); extern void cifs_release_automount_timer(void); @@ -530,8 +530,9 @@ extern int CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses); extern struct cifs_ses *sesInfoAlloc(void); extern void sesInfoFree(struct cifs_ses *); -extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled); -extern void tconInfoFree(struct cifs_tcon *); +extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled, + enum smb3_tcon_ref_trace trace); +extern void tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace); extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number); @@ -721,8 +722,6 @@ static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options) return options; } -struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon); -void cifs_put_tcon_super(struct super_block *sb); int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry); /* Put references of @ses and its children */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 4e35970681bf..7a16e12f5da8 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1943,7 +1943,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) } /* no need to setup directory caching on IPC share, so pass in false */ - tcon = tcon_info_alloc(false); + tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_ipc); if (tcon == NULL) return -ENOMEM; @@ -1960,7 +1960,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) if (rc) { cifs_server_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc); - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc_fail); goto out; } @@ -2043,7 +2043,7 @@ void __cifs_put_smb_ses(struct cifs_ses *ses) * files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an * SMB2 LOGOFF Request. */ - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc); if (do_logoff) { xid = get_xid(); rc = server->ops->logoff(xid, ses); @@ -2432,6 +2432,8 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) continue; } ++tcon->tc_count; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_find); spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); return tcon; @@ -2441,7 +2443,7 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) } void -cifs_put_tcon(struct cifs_tcon *tcon) +cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace) { unsigned int xid; struct cifs_ses *ses; @@ -2457,6 +2459,7 @@ cifs_put_tcon(struct cifs_tcon *tcon) cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count); spin_lock(&cifs_tcp_ses_lock); spin_lock(&tcon->tc_lock); + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count - 1, trace); if (--tcon->tc_count > 0) { spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); @@ -2493,7 +2496,7 @@ cifs_put_tcon(struct cifs_tcon *tcon) _free_xid(xid); cifs_fscache_release_super_cookie(tcon); - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free); cifs_put_smb_ses(ses); } @@ -2547,7 +2550,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) nohandlecache = ctx->nohandlecache; else nohandlecache = true; - tcon = tcon_info_alloc(!nohandlecache); + tcon = tcon_info_alloc(!nohandlecache, netfs_trace_tcon_ref_new); if (tcon == NULL) { rc = -ENOMEM; goto out_fail; @@ -2737,7 +2740,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) return tcon; out_fail: - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free_fail); return ERR_PTR(rc); } @@ -2754,7 +2757,7 @@ cifs_put_tlink(struct tcon_link *tlink) } if (!IS_ERR(tlink_tcon(tlink))) - cifs_put_tcon(tlink_tcon(tlink)); + cifs_put_tcon(tlink_tcon(tlink), netfs_trace_tcon_ref_put_tlink); kfree(tlink); } @@ -3319,7 +3322,7 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx) int rc = 0; if (mnt_ctx->tcon) - cifs_put_tcon(mnt_ctx->tcon); + cifs_put_tcon(mnt_ctx->tcon, netfs_trace_tcon_ref_put_mnt_ctx); else if (mnt_ctx->ses) cifs_put_smb_ses(mnt_ctx->ses); else if (mnt_ctx->server) diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c index 113bde8f1e61..1a895e6243ee 100644 --- a/fs/smb/client/fscache.c +++ b/fs/smb/client/fscache.c @@ -94,6 +94,11 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) } pr_err("Cache volume key already in use (%s)\n", key); vcookie = NULL; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_fscache_collision); + } else { + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_fscache_okay); } tcon->fscache = vcookie; @@ -115,6 +120,8 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) cifs_fscache_fill_volume_coherency(tcon, &cd); fscache_relinquish_volume(tcon->fscache, &cd, false); tcon->fscache = NULL; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_fscache_relinq); } void cifs_fscache_get_inode_cookie(struct inode *inode) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index ad44f8d66b37..07c468ddb88a 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -111,9 +111,10 @@ sesInfoFree(struct cifs_ses *buf_to_free) } struct cifs_tcon * -tcon_info_alloc(bool dir_leases_enabled) +tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace) { struct cifs_tcon *ret_buf; + static atomic_t tcon_debug_id; ret_buf = kzalloc(sizeof(*ret_buf), GFP_KERNEL); if (!ret_buf) @@ -130,7 +131,8 @@ tcon_info_alloc(bool dir_leases_enabled) atomic_inc(&tconInfoAllocCount); ret_buf->status = TID_NEW; - ++ret_buf->tc_count; + ret_buf->debug_id = atomic_inc_return(&tcon_debug_id); + ret_buf->tc_count = 1; spin_lock_init(&ret_buf->tc_lock); INIT_LIST_HEAD(&ret_buf->openFileList); INIT_LIST_HEAD(&ret_buf->tcon_list); @@ -142,17 +144,19 @@ tcon_info_alloc(bool dir_leases_enabled) #ifdef CONFIG_CIFS_FSCACHE mutex_init(&ret_buf->fscache_lock); #endif + trace_smb3_tcon_ref(ret_buf->debug_id, ret_buf->tc_count, trace); return ret_buf; } void -tconInfoFree(struct cifs_tcon *tcon) +tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace) { if (tcon == NULL) { cifs_dbg(FYI, "Null buffer passed to tconInfoFree\n"); return; } + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, trace); free_cached_dirs(tcon->cfids); atomic_dec(&tconInfoAllocCount); kfree(tcon->nativeFileSystem); diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index cc72be5a93a9..677ef6f99a5b 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -767,7 +767,7 @@ smb2_cancelled_close_fid(struct work_struct *work) if (rc) cifs_tcon_dbg(VFS, "Close cancelled mid failed rc:%d\n", rc); - cifs_put_tcon(tcon); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_close_fid); kfree(cancelled); } @@ -811,6 +811,8 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, if (tcon->tc_count <= 0) { struct TCP_Server_Info *server = NULL; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_cancelled_close); WARN_ONCE(tcon->tc_count < 0, "tcon refcount is negative"); spin_unlock(&cifs_tcp_ses_lock); @@ -823,12 +825,14 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, return 0; } tcon->tc_count++; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_cancelled_close); spin_unlock(&cifs_tcp_ses_lock); rc = __smb2_handle_cancelled_cmd(tcon, SMB2_CLOSE_HE, 0, persistent_fid, volatile_fid); if (rc) - cifs_put_tcon(tcon); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_close); return rc; } @@ -856,7 +860,7 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve rsp->PersistentFileId, rsp->VolatileFileId); if (rc) - cifs_put_tcon(tcon); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_mid); return rc; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 78c94d0350fe..28f0b7d19d53 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2915,8 +2915,11 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, tcon = list_first_entry_or_null(&ses->tcon_list, struct cifs_tcon, tcon_list); - if (tcon) + if (tcon) { tcon->tc_count++; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_dfs_refer); + } spin_unlock(&cifs_tcp_ses_lock); } @@ -2980,6 +2983,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, /* ipc tcons are not refcounted */ spin_lock(&cifs_tcp_ses_lock); tcon->tc_count--; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_dec_dfs_refer); /* tc_count can never go negative */ WARN_ON(tcon->tc_count < 0); spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 86c647a947cc..a5efce03cb58 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4138,6 +4138,8 @@ void smb2_reconnect_server(struct work_struct *work) list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->need_reconnect || tcon->need_reopen_files) { tcon->tc_count++; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_reconnect_server); list_add_tail(&tcon->rlist, &tmp_list); tcon_selected = true; } @@ -4176,14 +4178,14 @@ void smb2_reconnect_server(struct work_struct *work) if (tcon->ipc) cifs_put_smb_ses(tcon->ses); else - cifs_put_tcon(tcon); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_reconnect_server); } if (!ses_exist) goto done; /* allocate a dummy tcon struct used for reconnect */ - tcon = tcon_info_alloc(false); + tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_reconnect_server); if (!tcon) { resched = true; list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { @@ -4206,7 +4208,7 @@ void smb2_reconnect_server(struct work_struct *work) list_del_init(&ses->rlist); cifs_put_smb_ses(ses); } - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free_reconnect_server); done: cifs_dbg(FYI, "Reconnecting tcons and channels finished\n"); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 1d6e54f7879e..02135a605305 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -189,6 +189,8 @@ smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32 tid) if (tcon->tid != tid) continue; ++tcon->tc_count; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_find_sess_tcon); return tcon; } diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 5e83cb9da902..604e52876cd2 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -3,6 +3,9 @@ * Copyright (C) 2018, Microsoft Corporation. * * Author(s): Steve French + * + * Please use this 3-part article as a reference for writing new tracepoints: + * https://lwn.net/Articles/379903/ */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cifs @@ -15,9 +18,70 @@ #include /* - * Please use this 3-part article as a reference for writing new tracepoints: - * https://lwn.net/Articles/379903/ + * Specify enums for tracing information. */ +#define smb3_tcon_ref_traces \ + EM(netfs_trace_tcon_ref_dec_dfs_refer, "DEC DfsRef") \ + EM(netfs_trace_tcon_ref_free, "FRE ") \ + EM(netfs_trace_tcon_ref_free_fail, "FRE Fail ") \ + EM(netfs_trace_tcon_ref_free_ipc, "FRE Ipc ") \ + EM(netfs_trace_tcon_ref_free_ipc_fail, "FRE Ipc-F ") \ + EM(netfs_trace_tcon_ref_free_reconnect_server, "FRE Reconn") \ + EM(netfs_trace_tcon_ref_get_cancelled_close, "GET Cn-Cls") \ + EM(netfs_trace_tcon_ref_get_dfs_refer, "GET DfsRef") \ + EM(netfs_trace_tcon_ref_get_find, "GET Find ") \ + EM(netfs_trace_tcon_ref_get_find_sess_tcon, "GET FndSes") \ + EM(netfs_trace_tcon_ref_get_reconnect_server, "GET Reconn") \ + EM(netfs_trace_tcon_ref_new, "NEW ") \ + EM(netfs_trace_tcon_ref_new_ipc, "NEW Ipc ") \ + EM(netfs_trace_tcon_ref_new_reconnect_server, "NEW Reconn") \ + EM(netfs_trace_tcon_ref_put_cancelled_close, "PUT Cn-Cls") \ + EM(netfs_trace_tcon_ref_put_cancelled_close_fid, "PUT Cn-Fid") \ + EM(netfs_trace_tcon_ref_put_cancelled_mid, "PUT Cn-Mid") \ + EM(netfs_trace_tcon_ref_put_mnt_ctx, "PUT MntCtx") \ + EM(netfs_trace_tcon_ref_put_reconnect_server, "PUT Reconn") \ + EM(netfs_trace_tcon_ref_put_tlink, "PUT Tlink ") \ + EM(netfs_trace_tcon_ref_see_cancelled_close, "SEE Cn-Cls") \ + EM(netfs_trace_tcon_ref_see_fscache_collision, "SEE FV-CO!") \ + EM(netfs_trace_tcon_ref_see_fscache_okay, "SEE FV-Ok ") \ + EM(netfs_trace_tcon_ref_see_fscache_relinq, "SEE FV-Rlq") \ + E_(netfs_trace_tcon_ref_see_umount, "SEE Umount") + +#undef EM +#undef E_ + +/* + * Define those tracing enums. + */ +#ifndef __SMB3_DECLARE_TRACE_ENUMS_ONCE_ONLY +#define __SMB3_DECLARE_TRACE_ENUMS_ONCE_ONLY + +#define EM(a, b) a, +#define E_(a, b) a + +enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte); + +#undef EM +#undef E_ +#endif + +/* + * Export enum symbols via userspace. + */ +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define E_(a, b) TRACE_DEFINE_ENUM(a); + +smb3_tcon_ref_traces; + +#undef EM +#undef E_ + +/* + * Now redefine the EM() and E_() macros to map the enums to the strings that + * will be printed in the output. + */ +#define EM(a, b) { a, b }, +#define E_(a, b) { a, b } /* For logging errors in read or write */ DECLARE_EVENT_CLASS(smb3_rw_err_class, @@ -1125,6 +1189,30 @@ DEFINE_SMB3_CREDIT_EVENT(waitff_credits); DEFINE_SMB3_CREDIT_EVENT(overflow_credits); DEFINE_SMB3_CREDIT_EVENT(set_credits); + +TRACE_EVENT(smb3_tcon_ref, + TP_PROTO(unsigned int tcon_debug_id, int ref, + enum smb3_tcon_ref_trace trace), + TP_ARGS(tcon_debug_id, ref, trace), + TP_STRUCT__entry( + __field(unsigned int, tcon) + __field(int, ref) + __field(enum smb3_tcon_ref_trace, trace) + ), + TP_fast_assign( + __entry->tcon = tcon_debug_id; + __entry->ref = ref; + __entry->trace = trace; + ), + TP_printk("TC=%08x %s r=%u", + __entry->tcon, + __print_symbolic(__entry->trace, smb3_tcon_ref_traces), + __entry->ref) + ); + + +#undef EM +#undef E_ #endif /* _CIFS_TRACE_H */ #undef TRACE_INCLUDE_PATH From 18d86965e31f9be4d477da0744a7cdc9815858de Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 19 Apr 2024 12:05:07 -0300 Subject: [PATCH 141/313] smb: client: fix rename(2) regression against samba After commit 2c7d399e551c ("smb: client: reuse file lease key in compound operations") the client started reusing lease keys for rename, unlink and set path size operations to prevent it from breaking its own leases and thus causing unnecessary lease breaks to same connection. The implementation relies on positive dentries and cifsInodeInfo::lease_granted to decide whether reusing lease keys for the compound requests. cifsInodeInfo::lease_granted was introduced by commit 0ab95c2510b6 ("Defer close only when lease is enabled.") to indicate whether lease caching is granted for a specific file, but that can only happen until file is open, so cifsInodeInfo::lease_granted was left uninitialised in ->alloc_inode and then client started sending random lease keys for files that hadn't any leases. This fixes the following test case against samba: mount.cifs //srv/share /mnt/1 -o ...,nosharesock mount.cifs //srv/share /mnt/2 -o ...,nosharesock touch /mnt/1/foo; tail -f /mnt/1/foo & pid=$! mv /mnt/2/foo /mnt/2/bar # fails with -EIO kill $pid Fixes: 0ab95c2510b6 ("Defer close only when lease is enabled.") Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 30781789dfd9..39277c37185c 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -389,6 +389,7 @@ cifs_alloc_inode(struct super_block *sb) * server, can not assume caching of file data or metadata. */ cifs_set_oplock_level(cifs_inode, 0); + cifs_inode->lease_granted = false; cifs_inode->flags = 0; spin_lock_init(&cifs_inode->writers_lock); cifs_inode->writers = 0; From c119f4ede3fa90a9463f50831761c28f989bfb20 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 11 Apr 2024 23:02:15 +0900 Subject: [PATCH 142/313] ksmbd: fix slab-out-of-bounds in smb2_allocate_rsp_buf If ->ProtocolId is SMB2_TRANSFORM_PROTO_NUM, smb2 request size validation could be skipped. if request size is smaller than sizeof(struct smb2_query_info_req), slab-out-of-bounds read can happen in smb2_allocate_rsp_buf(). This patch allocate response buffer after decrypting transform request. smb3_decrypt_req() will validate transform request size and avoid slab-out-of-bound in smb2_allocate_rsp_buf(). Reported-by: Norbert Szetei Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/server.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index c0788188aa82..c67fbc8d6683 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -167,20 +167,17 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, int rc; bool is_chained = false; - if (conn->ops->allocate_rsp_buf(work)) - return; - if (conn->ops->is_transform_hdr && conn->ops->is_transform_hdr(work->request_buf)) { rc = conn->ops->decrypt_req(work); - if (rc < 0) { - conn->ops->set_rsp_status(work, STATUS_DATA_ERROR); - goto send; - } - + if (rc < 0) + return; work->encrypted = true; } + if (conn->ops->allocate_rsp_buf(work)) + return; + rc = conn->ops->init_rsp_hdr(work); if (rc) { /* either uid or tid is not correct */ From 17cf0c2794bdb6f39671265aa18aea5c22ee8c4a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 12 Apr 2024 09:45:00 +0900 Subject: [PATCH 143/313] ksmbd: validate request buffer size in smb2_allocate_rsp_buf() The response buffer should be allocated in smb2_allocate_rsp_buf before validating request. But the fields in payload as well as smb2 header is used in smb2_allocate_rsp_buf(). This patch add simple buffer size validation to avoid potencial out-of-bounds in request buffer. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 5723bbf372d7..ee4b2875a021 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -535,6 +535,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) if (cmd == SMB2_QUERY_INFO_HE) { struct smb2_query_info_req *req; + if (get_rfc1002_len(work->request_buf) < + offsetof(struct smb2_query_info_req, OutputBufferLength)) + return -EINVAL; + req = smb2_get_msg(work->request_buf); if ((req->InfoType == SMB2_O_INFO_FILE && (req->FileInfoClass == FILE_FULL_EA_INFORMATION || From 4973b04d3ea577db80c501c5f14e68ec69fe1794 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Mon, 15 Apr 2024 15:12:48 +0200 Subject: [PATCH 144/313] ksmbd: clear RENAME_NOREPLACE before calling vfs_rename File overwrite case is explicitly handled, so it is not necessary to pass RENAME_NOREPLACE to vfs_rename. Clearing the flag fixes rename operations when the share is a ntfs-3g mount. The latter uses an older version of fuse with no support for flags in the ->rename op. Cc: stable@vger.kernel.org Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 22f0f3db3ac9..51b1b0bed616 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -754,10 +754,15 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, goto out4; } + /* + * explicitly handle file overwrite case, for compatibility with + * filesystems that may not support rename flags (e.g: fuse) + */ if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) { err = -EEXIST; goto out4; } + flags &= ~(RENAME_NOREPLACE); if (old_child == trap) { err = -EINVAL; From 0268a7cc7fdc47d90b6c18859de7718d5059f6f1 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 19 Apr 2024 23:46:34 +0900 Subject: [PATCH 145/313] ksmbd: common: use struct_group_attr instead of struct_group for network_open_info 4byte padding cause the connection issue with the applications of MacOS. smb2_close response size increases by 4 bytes by padding, And the smb client of MacOS check it and stop the connection. This patch use struct_group_attr instead of struct_group for network_open_info to use __packed to avoid padding. Fixes: 0015eb6e1238 ("smb: client, common: fix fortify warnings") Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smb2pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 1b594307c9d5..202ff9128156 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -711,7 +711,7 @@ struct smb2_close_rsp { __le16 StructureSize; /* 60 */ __le16 Flags; __le32 Reserved; - struct_group(network_open_info, + struct_group_attr(network_open_info, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; From e9d8c2f95ab8acaf3f4d4a53682a4afa3c263692 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 20 Apr 2024 09:17:58 +0900 Subject: [PATCH 146/313] ksmbd: add continuous availability share parameter If capabilities of the share is not SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY, ksmbd should not grant a persistent handle to the client. This patch add continuous availability share parameter to control it. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/ksmbd_netlink.h | 35 ++++++++++++++++++----------------- fs/smb/server/smb2pdu.c | 11 +++++++++-- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index 686b321c5a8b..f4e55199938d 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -340,23 +340,24 @@ enum KSMBD_TREE_CONN_STATUS { /* * Share config flags. */ -#define KSMBD_SHARE_FLAG_INVALID (0) -#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0) -#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1) -#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2) -#define KSMBD_SHARE_FLAG_READONLY BIT(3) -#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4) -#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5) -#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6) -#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7) -#define KSMBD_SHARE_FLAG_PIPE BIT(8) -#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9) -#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10) -#define KSMBD_SHARE_FLAG_STREAMS BIT(11) -#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12) -#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13) -#define KSMBD_SHARE_FLAG_UPDATE BIT(14) -#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15) +#define KSMBD_SHARE_FLAG_INVALID (0) +#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0) +#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1) +#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2) +#define KSMBD_SHARE_FLAG_READONLY BIT(3) +#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4) +#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5) +#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6) +#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7) +#define KSMBD_SHARE_FLAG_PIPE BIT(8) +#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9) +#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10) +#define KSMBD_SHARE_FLAG_STREAMS BIT(11) +#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12) +#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13) +#define KSMBD_SHARE_FLAG_UPDATE BIT(14) +#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15) +#define KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY BIT(16) /* * Tree connect request flags. diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index ee4b2875a021..355824151c2d 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1988,7 +1988,12 @@ int smb2_tree_connect(struct ksmbd_work *work) write_unlock(&sess->tree_conns_lock); rsp->StructureSize = cpu_to_le16(16); out_err1: - rsp->Capabilities = 0; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE && + test_share_config_flag(share, + KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY)) + rsp->Capabilities = SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY; + else + rsp->Capabilities = 0; rsp->Reserved = 0; /* default manual caching */ rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING; @@ -3502,7 +3507,9 @@ int smb2_open(struct ksmbd_work *work) memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE); if (dh_info.type == DURABLE_REQ_V2 || dh_info.type == DURABLE_REQ) { - if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent) + if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent && + test_share_config_flag(work->tcon->share_conf, + KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY)) fp->is_persistent = true; else fp->is_durable = true; From 680d11f6e5427b6af1321932286722d24a8b16c1 Mon Sep 17 00:00:00 2001 From: Yick Xie Date: Fri, 19 Apr 2024 01:06:10 +0800 Subject: [PATCH 147/313] udp: preserve the connected status if only UDP cmsg If "udp_cmsg_send()" returned 0 (i.e. only UDP cmsg), "connected" should not be set to 0. Otherwise it stops the connected socket from using the cached route. Fixes: 2e8de8576343 ("udp: add gso segment cmsg") Signed-off-by: Yick Xie Cc: stable@vger.kernel.org Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240418170610.867084-1-yick.xie@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 5 +++-- net/ipv6/udp.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c02bf011d4a6..420905be5f30 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1123,16 +1123,17 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = udp_cmsg_send(sk, msg, &ipc.gso_size); - if (err > 0) + if (err > 0) { err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); + connected = 0; + } if (unlikely(err < 0)) { kfree(ipc.opt); return err; } if (ipc.opt) free = 1; - connected = 0; } if (!ipc.opt) { struct ip_options_rcu *inet_opt; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8b1dd7f51249..1a4cccdd40c9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1474,9 +1474,11 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.opt = opt; err = udp_cmsg_send(sk, msg, &ipc6.gso_size); - if (err > 0) + if (err > 0) { err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6, &ipc6); + connected = false; + } if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1488,7 +1490,6 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; - connected = false; } if (!opt) { opt = txopt_get(np); From 9f898fc2c31fbf0ac5ecd289f528a716464cb005 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Thu, 18 Apr 2024 11:05:41 -0700 Subject: [PATCH 148/313] net: bcmasp: fix memory leak when bringing down interface When bringing down the TX rings we flush the rings but forget to reclaimed the flushed packets. This leads to a memory leak since we do not free the dma mapped buffers. This also leads to tx control block corruption when bringing down the interface for power management. Fixes: 490cb412007d ("net: bcmasp: Add support for ASP2.0 Ethernet controller") Signed-off-by: Justin Chen Acked-by: Florian Fainelli Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240418180541.2271719-1-justin.chen@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/asp2/bcmasp_intf.c | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index 72ea97c5d5d4..82768b0e9026 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -436,10 +436,8 @@ static void umac_init(struct bcmasp_intf *intf) umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ); } -static int bcmasp_tx_poll(struct napi_struct *napi, int budget) +static int bcmasp_tx_reclaim(struct bcmasp_intf *intf) { - struct bcmasp_intf *intf = - container_of(napi, struct bcmasp_intf, tx_napi); struct bcmasp_intf_stats64 *stats = &intf->stats64; struct device *kdev = &intf->parent->pdev->dev; unsigned long read, released = 0; @@ -482,10 +480,16 @@ static int bcmasp_tx_poll(struct napi_struct *napi, int budget) DESC_RING_COUNT); } - /* Ensure all descriptors have been written to DRAM for the hardware - * to see updated contents. - */ - wmb(); + return released; +} + +static int bcmasp_tx_poll(struct napi_struct *napi, int budget) +{ + struct bcmasp_intf *intf = + container_of(napi, struct bcmasp_intf, tx_napi); + int released = 0; + + released = bcmasp_tx_reclaim(intf); napi_complete(&intf->tx_napi); @@ -797,6 +801,7 @@ static void bcmasp_init_tx(struct bcmasp_intf *intf) intf->tx_spb_dma_read = intf->tx_spb_dma_addr; intf->tx_spb_index = 0; intf->tx_spb_clean_index = 0; + memset(intf->tx_cbs, 0, sizeof(struct bcmasp_tx_cb) * DESC_RING_COUNT); /* Make sure channels are disabled */ tx_spb_ctrl_wl(intf, 0x0, TX_SPB_CTRL_ENABLE); @@ -885,6 +890,8 @@ static void bcmasp_netif_deinit(struct net_device *dev) } while (timeout-- > 0); tx_spb_dma_wl(intf, 0x0, TX_SPB_DMA_FIFO_CTRL); + bcmasp_tx_reclaim(intf); + umac_enable_set(intf, UMC_CMD_TX_EN, 0); phy_stop(dev->phydev); From 976c44af48141cd8595601c0af2a19a43c5b228b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 18 Apr 2024 15:46:06 +0200 Subject: [PATCH 149/313] mlxsw: core: Unregister EMAD trap using FORWARD action The device's manual (PRM - Programmer's Reference Manual) classifies the trap that is used to deliver EMAD responses as an "event trap". Among other things, it means that the only actions that can be associated with the trap are TRAP and FORWARD (NOP). Currently, during driver de-initialization the driver unregisters the trap by setting its action to DISCARD, which violates the above guideline. Future firmware versions will prevent such misuses by returning an error. This does not prevent the driver from working, but an error will be printed to the kernel log during module removal / devlink reload: mlxsw_spectrum 0000:03:00.0: Reg cmd access status failed (status=7(bad parameter)) mlxsw_spectrum 0000:03:00.0: Reg cmd access failed (reg_id=7003(hpkt),type=write) Suppress the error message by aligning the driver to the manual and use a FORWARD (NOP) action when unregistering the trap. Fixes: 4ec14b7634b2 ("mlxsw: Add interface to access registers and process events") Cc: Jiri Pirko Cc: Amit Cohen Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: Simon Horman Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/753a89e14008fde08cb4a2c1e5f537b81d8eb2d6.1713446092.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e4d7739bd7c8..4a79c0d7e7ad 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -849,7 +849,7 @@ static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u16 local_port, static const struct mlxsw_listener mlxsw_emad_rx_listener = MLXSW_RXL(mlxsw_emad_rx_listener_func, ETHEMAD, TRAP_TO_CPU, false, - EMAD, DISCARD); + EMAD, FORWARD); static int mlxsw_emad_tlv_enable(struct mlxsw_core *mlxsw_core) { From 7e2050a8366315aeaf0316b3d362e67cf58f3ea8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 18 Apr 2024 15:46:07 +0200 Subject: [PATCH 150/313] mlxsw: core_env: Fix driver initialization with old firmware The driver queries the Management Capabilities Mask (MCAM) register during initialization to understand if it can read up to 128 bytes from transceiver modules. However, not all firmware versions support this register, leading to the driver failing to load. Fix by treating an error in the register query as an indication that the feature is not supported. Fixes: 1f4aea1f72da ("mlxsw: core_env: Read transceiver module EEPROM in 128 bytes chunks") Reported-by: Tim 'mithro' Ansell Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/0afa8b2e8bac178f5f88211344429176dcc72281.1713446092.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/core_env.c | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 53b150b7ae4e..6c06b0592760 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -1357,24 +1357,20 @@ static struct mlxsw_linecards_event_ops mlxsw_env_event_ops = { .got_inactive = mlxsw_env_got_inactive, }; -static int mlxsw_env_max_module_eeprom_len_query(struct mlxsw_env *mlxsw_env) +static void mlxsw_env_max_module_eeprom_len_query(struct mlxsw_env *mlxsw_env) { char mcam_pl[MLXSW_REG_MCAM_LEN]; - bool mcia_128b_supported; + bool mcia_128b_supported = false; int err; mlxsw_reg_mcam_pack(mcam_pl, MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES); err = mlxsw_reg_query(mlxsw_env->core, MLXSW_REG(mcam), mcam_pl); - if (err) - return err; - - mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_MCIA_128B, - &mcia_128b_supported); + if (!err) + mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_MCIA_128B, + &mcia_128b_supported); mlxsw_env->max_eeprom_len = mcia_128b_supported ? 128 : 48; - - return 0; } int mlxsw_env_init(struct mlxsw_core *mlxsw_core, @@ -1445,15 +1441,11 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core, if (err) goto err_type_set; - err = mlxsw_env_max_module_eeprom_len_query(env); - if (err) - goto err_eeprom_len_query; - + mlxsw_env_max_module_eeprom_len_query(env); env->line_cards[0]->active = true; return 0; -err_eeprom_len_query: err_type_set: mlxsw_env_module_event_disable(env, 0); err_mlxsw_env_module_event_enable: From 773501d01e6bc3f2557882a25679392d982d5f3e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 18 Apr 2024 15:46:08 +0200 Subject: [PATCH 151/313] mlxsw: pci: Fix driver initialization with old firmware The driver queries the Management Capabilities Mask (MCAM) register during initialization to understand if a new and deeper reset flow is supported. However, not all firmware versions support this register, leading to the driver failing to load. Fix by treating an error in the register query as an indication that the feature is not supported. Fixes: f257c73e5356 ("mlxsw: pci: Add support for new reset flow") Reported-by: Tim 'mithro' Ansell Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: Simon Horman Reviewed-by: Kalesh AP Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/ee968c49d53bac96a4c66d1b09ebbd097d81aca5.1713446092.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index af99bf17eb36..f42a1b1c9368 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1530,7 +1530,7 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id) { struct pci_dev *pdev = mlxsw_pci->pdev; char mcam_pl[MLXSW_REG_MCAM_LEN]; - bool pci_reset_supported; + bool pci_reset_supported = false; u32 sys_status; int err; @@ -1548,11 +1548,9 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id) mlxsw_reg_mcam_pack(mcam_pl, MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES); err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl); - if (err) - return err; - - mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET, - &pci_reset_supported); + if (!err) + mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET, + &pci_reset_supported); if (pci_reset_supported) { pci_dbg(pdev, "Starting PCI reset flow\n"); From fcdbc1d7a4b638e5d5668de461f320386f3002aa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Apr 2024 15:19:50 -0400 Subject: [PATCH 152/313] bcachefs: Check for journal entries overruning end of sb clean section Fix a missing bounds check in superblock validation. Note that we don't yet have repair code for this case - repair code for individual items is generally low priority, since the whole superblock is checksummed, validated prior to write, and we have backups. Reported-by: lei lu Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-clean.c | 8 ++++++++ fs/bcachefs/sb-errors_types.h | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 5980ba2563fe..35ca3f138de6 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -29,6 +29,14 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle for (entry = clean->start; entry < (struct jset_entry *) vstruct_end(&clean->field); entry = vstruct_next(entry)) { + if (vstruct_end(entry) > vstruct_end(&clean->field)) { + bch_err(c, "journal entry (u64s %u) overran end of superblock clean section (u64s %u) by %zu", + le16_to_cpu(entry->u64s), le32_to_cpu(clean->field.u64s), + (u64 *) vstruct_end(entry) - (u64 *) vstruct_end(&clean->field)); + bch2_sb_error_count(c, BCH_FSCK_ERR_sb_clean_entry_overrun); + return -BCH_ERR_fsck_repair_unimplemented; + } + ret = bch2_journal_entry_validate(c, NULL, entry, le16_to_cpu(c->disk_sb.sb->version), BCH_SB_BIG_ENDIAN(c->disk_sb.sb), diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h index 4ca6e7b0d8aa..06c7a644f4a4 100644 --- a/fs/bcachefs/sb-errors_types.h +++ b/fs/bcachefs/sb-errors_types.h @@ -271,7 +271,8 @@ x(btree_root_unreadable_and_scan_found_nothing, 263) \ x(snapshot_node_missing, 264) \ x(dup_backpointer_to_bad_csum_extent, 265) \ - x(btree_bitmap_not_marked, 266) + x(btree_bitmap_not_marked, 266) \ + x(sb_clean_entry_overrun, 267) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, From ec438ac59d7a8bd7e76d3e1201d55071be484626 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 20 Apr 2024 00:31:32 -0400 Subject: [PATCH 153/313] bcachefs: Fix missing call to bch2_fs_allocator_background_exit() Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 8daf80a38d60..88e214c609bb 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -544,6 +544,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); + bch2_fs_allocator_background_exit(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); bch2_fs_snapshots_exit(c); From 32cf5a4eda464d76d553ee3f1b06c4d33d796c52 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 19 Apr 2024 11:51:12 -0400 Subject: [PATCH 154/313] Revert "svcrdma: Add Write chunk WRs to the RPC's Send WR chain" Performance regression reported with NFS/RDMA using Omnipath, bisected to commit e084ee673c77 ("svcrdma: Add Write chunk WRs to the RPC's Send WR chain"). Tracing on the server reports: nfsd-7771 [060] 1758.891809: svcrdma_sq_post_err: cq.id=205 cid=226 sc_sq_avail=13643/851 status=-12 sq_post_err reports ENOMEM, and the rdma->sc_sq_avail (13643) is larger than rdma->sc_sq_depth (851). The number of available Send Queue entries is always supposed to be smaller than the Send Queue depth. That seems like a Send Queue accounting bug in svcrdma. As it's getting to be late in the 6.9-rc cycle, revert this commit. It can be revisited in a subsequent kernel release. Link: https://bugzilla.kernel.org/show_bug.cgi?id=218743 Fixes: e084ee673c77 ("svcrdma: Add Write chunk WRs to the RPC's Send WR chain") Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 13 +--- net/sunrpc/xprtrdma/svc_rdma_rw.c | 86 +++++++-------------------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 5 +- 3 files changed, 26 insertions(+), 78 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 24cd199dd6f3..d33bab33099a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -210,7 +210,6 @@ struct svc_rdma_recv_ctxt { */ struct svc_rdma_write_info { struct svcxprt_rdma *wi_rdma; - struct list_head wi_list; const struct svc_rdma_chunk *wi_chunk; @@ -239,10 +238,7 @@ struct svc_rdma_send_ctxt { struct ib_cqe sc_cqe; struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; - - struct list_head sc_write_info_list; struct svc_rdma_write_info sc_reply_info; - void *sc_xprt_buf; int sc_page_count; int sc_cur_sge_no; @@ -274,14 +270,11 @@ extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma, struct svc_rdma_chunk_ctxt *cc, enum dma_data_direction dir); -extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt); extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt); -extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma, - const struct svc_rdma_pcl *write_pcl, - struct svc_rdma_send_ctxt *sctxt, - const struct xdr_buf *xdr); +extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr); extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_pcl *write_pcl, const struct svc_rdma_pcl *reply_pcl, diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index f2a100c4c81f..40797114d50a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -230,28 +230,6 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info) queue_work(svcrdma_wq, &info->wi_work); } -/** - * svc_rdma_write_chunk_release - Release Write chunk I/O resources - * @rdma: controlling transport - * @ctxt: Send context that is being released - */ -void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt) -{ - struct svc_rdma_write_info *info; - struct svc_rdma_chunk_ctxt *cc; - - while (!list_empty(&ctxt->sc_write_info_list)) { - info = list_first_entry(&ctxt->sc_write_info_list, - struct svc_rdma_write_info, wi_list); - list_del(&info->wi_list); - - cc = &info->wi_cc; - svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); - svc_rdma_write_info_free(info); - } -} - /** * svc_rdma_reply_chunk_release - Release Reply chunk I/O resources * @rdma: controlling transport @@ -308,11 +286,13 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_chunk_ctxt *cc = container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); + struct svc_rdma_write_info *info = + container_of(cc, struct svc_rdma_write_info, wi_cc); switch (wc->status) { case IB_WC_SUCCESS: trace_svcrdma_wc_write(&cc->cc_cid); - return; + break; case IB_WC_WR_FLUSH_ERR: trace_svcrdma_wc_write_flush(wc, &cc->cc_cid); break; @@ -320,11 +300,12 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) trace_svcrdma_wc_write_err(wc, &cc->cc_cid); } - /* The RDMA Write has flushed, so the client won't get - * some of the outgoing RPC message. Signal the loss - * to the client by closing the connection. - */ - svc_xprt_deferred_close(&rdma->sc_xprt); + svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); + + if (unlikely(wc->status != IB_WC_SUCCESS)) + svc_xprt_deferred_close(&rdma->sc_xprt); + + svc_rdma_write_info_free(info); } /** @@ -620,19 +601,13 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data) return xdr->len; } -/* Link Write WRs for @chunk onto @sctxt's WR chain. - */ -static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *sctxt, - const struct svc_rdma_chunk *chunk, - const struct xdr_buf *xdr) +static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, + const struct svc_rdma_chunk *chunk, + const struct xdr_buf *xdr) { struct svc_rdma_write_info *info; struct svc_rdma_chunk_ctxt *cc; - struct ib_send_wr *first_wr; struct xdr_buf payload; - struct list_head *pos; - struct ib_cqe *cqe; int ret; if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position, @@ -648,25 +623,10 @@ static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma, if (ret != payload.len) goto out_err; - ret = -EINVAL; - if (unlikely(cc->cc_sqecount > rdma->sc_sq_depth)) - goto out_err; - - first_wr = sctxt->sc_wr_chain; - cqe = &cc->cc_cqe; - list_for_each(pos, &cc->cc_rwctxts) { - struct svc_rdma_rw_ctxt *rwc; - - rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list); - first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp, - rdma->sc_port_num, cqe, first_wr); - cqe = NULL; - } - sctxt->sc_wr_chain = first_wr; - sctxt->sc_sqecount += cc->cc_sqecount; - list_add(&info->wi_list, &sctxt->sc_write_info_list); - trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount); + ret = svc_rdma_post_chunk_ctxt(rdma, cc); + if (ret < 0) + goto out_err; return 0; out_err: @@ -675,27 +635,25 @@ static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma, } /** - * svc_rdma_prepare_write_list - Construct WR chain for sending Write list + * svc_rdma_send_write_list - Send all chunks on the Write list * @rdma: controlling RDMA transport - * @write_pcl: Write list provisioned by the client - * @sctxt: Send WR resources + * @rctxt: Write list provisioned by the client * @xdr: xdr_buf containing an RPC Reply message * * Returns zero on success, or a negative errno if one or more * Write chunks could not be sent. */ -int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma, - const struct svc_rdma_pcl *write_pcl, - struct svc_rdma_send_ctxt *sctxt, - const struct xdr_buf *xdr) +int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr) { struct svc_rdma_chunk *chunk; int ret; - pcl_for_each_chunk(chunk, write_pcl) { + pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) { if (!chunk->ch_payload_length) break; - ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr); + ret = svc_rdma_send_write_chunk(rdma, chunk, xdr); if (ret < 0) return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index dfca39abd16c..bb5436b719e0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -142,7 +142,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->sc_send_wr.sg_list = ctxt->sc_sges; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; ctxt->sc_cqe.done = svc_rdma_wc_send; - INIT_LIST_HEAD(&ctxt->sc_write_info_list); ctxt->sc_xprt_buf = buffer; xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, rdma->sc_max_req_size); @@ -228,7 +227,6 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma, struct ib_device *device = rdma->sc_cm_id->device; unsigned int i; - svc_rdma_write_chunk_release(rdma, ctxt); svc_rdma_reply_chunk_release(rdma, ctxt); if (ctxt->sc_page_count) @@ -1015,8 +1013,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) if (!p) goto put_ctxt; - ret = svc_rdma_prepare_write_list(rdma, &rctxt->rc_write_pcl, sctxt, - &rqstp->rq_res); + ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res); if (ret < 0) goto put_ctxt; From 6e4d9bd110e293513c3c2a3ff2dfa0a0735699e0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 20 Apr 2024 15:13:20 -0400 Subject: [PATCH 155/313] bcachefs: bkey_cached.btree_trans_barrier_seq needs to be a ulong this stores the SRCU sequence number, which we use to check if an SRCU barrier has elapsed; this is a partial fix for the key cache shrinker not actually freeing. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index e0c982a4195c..c69b233c41bb 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -321,9 +321,9 @@ struct bkey_cached { struct btree_bkey_cached_common c; unsigned long flags; + unsigned long btree_trans_barrier_seq; u16 u64s; bool valid; - u32 btree_trans_barrier_seq; struct bkey_cached_key key; struct rhash_head hash; From adfe9357c39e251ffe22ceaa1edb4b7662ed76e6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 20 Apr 2024 15:35:40 -0400 Subject: [PATCH 156/313] bcachefs: Tweak btree key cache shrinker so it actually frees Freeing key cache items is a multi stage process; we need to wait for an SRCU grace period to elapse, and we handle this ourselves - partially to avoid callback overhead, but primarily so that when allocating we can first allocate from the freed items waiting for an SRCU grace period. Previously, the shrinker was counting the items on the 'waiting for SRCU grace period' lists as items being scanned, but this meant that too many items waiting for an SRCU grace period could prevent it from doing any work at all. After this, we're seeing that items skipped due to the accessed bit are the main cause of the shrinker not making any progress, and we actually want the key cache shrinker to run quite aggressively because reclaimed items will still generally be found (more compactly) in the btree node cache - so we also tweak the shrinker to not count those against nr_to_scan. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 88a3582a3275..e8c1c530cd95 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -842,8 +842,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, * Newest freed entries are at the end of the list - once we hit one * that's too new to be freed, we can bail out: */ - scanned += bc->nr_freed_nonpcpu; - list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) @@ -857,11 +855,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, bc->nr_freed_nonpcpu--; } - if (scanned >= nr) - goto out; - - scanned += bc->nr_freed_pcpu; - list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) @@ -875,9 +868,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, bc->nr_freed_pcpu--; } - if (scanned >= nr) - goto out; - rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); if (bc->shrink_iter >= tbl->size) @@ -893,12 +883,12 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter); ck = container_of(pos, struct bkey_cached, hash); - if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) + if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { goto next; - - if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) + } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) { clear_bit(BKEY_CACHED_ACCESSED, &ck->flags); - else if (bkey_cached_lock_for_evict(ck)) { + goto next; + } else if (bkey_cached_lock_for_evict(ck)) { bkey_cached_evict(bc, ck); bkey_cached_free(bc, ck); } @@ -916,7 +906,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, } while (scanned < nr && bc->shrink_iter != start); rcu_read_unlock(); -out: memalloc_nofs_restore(flags); srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); mutex_unlock(&bc->lock); From 85ab365f7cdf2b2a713823a93e7e5e94f0529627 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 19 Apr 2024 21:54:32 -0400 Subject: [PATCH 157/313] bcachefs: Fix deadlock in journal write path bch2_journal_write() was incorrectly waiting on earlier journal writes synchronously; this usually worked because most of the time we'd be running in the context of a thread that did a journal_buf_put(), but sometimes we'd be running out of the same workqueue that completes those prior journal writes. Additionally, this makes sure to punt to a workqueue before submitting preflushes - we really don't want to be calling submit_bio() in the main transaction commit path. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 60 ++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 9aa28b52ab92..eb1f9d6f5a19 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1723,7 +1723,7 @@ static void journal_write_endio(struct bio *bio) percpu_ref_put(&ca->io_ref); } -static CLOSURE_CALLBACK(do_journal_write) +static CLOSURE_CALLBACK(journal_write_submit) { closure_type(w, struct journal_buf, io); struct journal *j = container_of(w, struct journal, buf[w->idx]); @@ -1768,6 +1768,44 @@ static CLOSURE_CALLBACK(do_journal_write) continue_at(cl, journal_write_done, j->wq); } +static CLOSURE_CALLBACK(journal_write_preflush) +{ + closure_type(w, struct journal_buf, io); + struct journal *j = container_of(w, struct journal, buf[w->idx]); + struct bch_fs *c = container_of(j, struct bch_fs, journal); + + if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { + spin_lock(&j->lock); + closure_wait(&j->async_wait, cl); + spin_unlock(&j->lock); + + continue_at(cl, journal_write_preflush, j->wq); + return; + } + + if (w->separate_flush) { + for_each_rw_member(c, ca) { + percpu_ref_get(&ca->io_ref); + + struct journal_device *ja = &ca->journal; + struct bio *bio = &ja->bio[w->idx]->bio; + bio_reset(bio, ca->disk_sb.bdev, + REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH); + bio->bi_end_io = journal_write_endio; + bio->bi_private = ca; + closure_bio_submit(bio, cl); + } + + continue_at(cl, journal_write_submit, j->wq); + } else { + /* + * no need to punt to another work item if we're not waiting on + * preflushes + */ + journal_write_submit(&cl->work); + } +} + static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) { struct bch_fs *c = container_of(j, struct bch_fs, journal); @@ -2033,23 +2071,9 @@ CLOSURE_CALLBACK(bch2_journal_write) goto err; if (!JSET_NO_FLUSH(w->data)) - closure_wait_event(&j->async_wait, j->seq_ondisk + 1 == le64_to_cpu(w->data->seq)); - - if (!JSET_NO_FLUSH(w->data) && w->separate_flush) { - for_each_rw_member(c, ca) { - percpu_ref_get(&ca->io_ref); - - struct journal_device *ja = &ca->journal; - struct bio *bio = &ja->bio[w->idx]->bio; - bio_reset(bio, ca->disk_sb.bdev, - REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH); - bio->bi_end_io = journal_write_endio; - bio->bi_private = ca; - closure_bio_submit(bio, cl); - } - } - - continue_at(cl, do_journal_write, j->wq); + continue_at(cl, journal_write_preflush, j->wq); + else + continue_at(cl, journal_write_submit, j->wq); return; no_io: continue_at(cl, journal_write_done, j->wq); From 0e42f381193d7f9b47922f1c4308e7729a45ba13 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 20 Apr 2024 22:26:47 -0400 Subject: [PATCH 158/313] bcachefs: Fix inode early destruction path discard_new_inode() is the wrong interface to use when we need to free an inode that was never inserted into the inode hash table; we can bypass the whole iput() -> evict() path and replace it with __destroy_inode(); kmem_cache_free() - this fixes a WARN_ON() about I_NEW. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b5ea9fa1259d..fce690007edf 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -188,7 +188,8 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino BUG_ON(!old); if (unlikely(old != inode)) { - discard_new_inode(&inode->v); + __destroy_inode(&inode->v); + kmem_cache_free(bch2_inode_cache, inode); inode = old; } else { mutex_lock(&c->vfs_inodes_lock); @@ -225,8 +226,10 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans) if (unlikely(!inode)) { int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM); - if (ret && inode) - discard_new_inode(&inode->v); + if (ret && inode) { + __destroy_inode(&inode->v); + kmem_cache_free(bch2_inode_cache, inode); + } if (ret) return ERR_PTR(ret); } From e027b71762e84ee9d4ba9ad5401b956b9e83ed2a Mon Sep 17 00:00:00 2001 From: Andrei Simion Date: Thu, 4 Apr 2024 15:38:23 +0300 Subject: [PATCH 159/313] ARM: dts: microchip: at91-sama7g5ek: Replace regulator-suspend-voltage with the valid property By checking the pmic node with microchip,mcp16502.yaml# 'regulator-suspend-voltage' does not match any of the regexes 'pinctrl-[0-9]+' from schema microchip,mcp16502.yaml# which inherits regulator.yaml#. So replace regulator-suspend-voltage with regulator-suspend-microvolt to avoid the inconsitency. Fixes: 85b1304b9daa ("ARM: dts: at91: sama7g5ek: set regulator voltages for standby state") Signed-off-by: Andrei Simion Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20240404123824.19182-2-andrei.simion@microchip.com [claudiu.beznea: added a dot before starting the last sentence in commit description] Signed-off-by: Claudiu Beznea --- arch/arm/boot/dts/microchip/at91-sama7g5ek.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/microchip/at91-sama7g5ek.dts b/arch/arm/boot/dts/microchip/at91-sama7g5ek.dts index 217e9b96c61e..20b2497657ae 100644 --- a/arch/arm/boot/dts/microchip/at91-sama7g5ek.dts +++ b/arch/arm/boot/dts/microchip/at91-sama7g5ek.dts @@ -293,7 +293,7 @@ vddcore: VDD_CORE { regulator-state-standby { regulator-on-in-suspend; - regulator-suspend-voltage = <1150000>; + regulator-suspend-microvolt = <1150000>; regulator-mode = <4>; }; @@ -314,7 +314,7 @@ vddcpu: VDD_OTHER { regulator-state-standby { regulator-on-in-suspend; - regulator-suspend-voltage = <1050000>; + regulator-suspend-microvolt = <1050000>; regulator-mode = <4>; }; @@ -331,7 +331,7 @@ vldo1: LDO1 { regulator-always-on; regulator-state-standby { - regulator-suspend-voltage = <1800000>; + regulator-suspend-microvolt = <1800000>; regulator-on-in-suspend; }; @@ -346,7 +346,7 @@ vldo2: LDO2 { regulator-max-microvolt = <3700000>; regulator-state-standby { - regulator-suspend-voltage = <1800000>; + regulator-suspend-microvolt = <1800000>; regulator-on-in-suspend; }; From 1fe5e0a31e6202025a100fc08cd5902f6abbaaba Mon Sep 17 00:00:00 2001 From: Andrei Simion Date: Thu, 4 Apr 2024 15:38:24 +0300 Subject: [PATCH 160/313] ARM: dts: microchip: at91-sama7g54_curiosity: Replace regulator-suspend-voltage with the valid property By checking the pmic node with microchip,mcp16502.yaml# 'regulator-suspend-voltage' does not match any of the regexes 'pinctrl-[0-9]+' from schema microchip,mcp16502.yaml# which inherits regulator.yaml#. So replace regulator-suspend-voltage with regulator-suspend-microvolt to avoid the inconsitency. Fixes: ebd6591f8ddb ("ARM: dts: microchip: sama7g54_curiosity: Add initial device tree of the board") Signed-off-by: Andrei Simion Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20240404123824.19182-3-andrei.simion@microchip.com Signed-off-by: Claudiu Beznea --- arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts b/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts index 4f609e9e510e..009d2c832421 100644 --- a/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts +++ b/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts @@ -242,7 +242,7 @@ vddcore: VDD_CORE { regulator-state-standby { regulator-on-in-suspend; - regulator-suspend-voltage = <1150000>; + regulator-suspend-microvolt = <1150000>; regulator-mode = <4>; }; @@ -263,7 +263,7 @@ vddcpu: VDD_OTHER { regulator-state-standby { regulator-on-in-suspend; - regulator-suspend-voltage = <1050000>; + regulator-suspend-microvolt = <1050000>; regulator-mode = <4>; }; @@ -280,7 +280,7 @@ vldo1: LDO1 { regulator-always-on; regulator-state-standby { - regulator-suspend-voltage = <1800000>; + regulator-suspend-microvolt = <1800000>; regulator-on-in-suspend; }; @@ -296,7 +296,7 @@ vldo2: LDO2 { regulator-always-on; regulator-state-standby { - regulator-suspend-voltage = <3300000>; + regulator-suspend-microvolt = <3300000>; regulator-on-in-suspend; }; From 10947b276b90df38e60aa3efd6b4b7a4b3c92fab Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 5 Apr 2024 22:21:53 +0200 Subject: [PATCH 161/313] arm64: dts: imx8mp: Fix assigned-clocks for second CSI2 The first CSI2 pixel clock are supplied from IMX8MP_CLK_MEDIA_CAM1_PIX_ROOT, the second CSI2 pixel clock are supplied from IMX8MP_CLK_MEDIA_CAM2_PIX_ROOT, both clock are supplied from SYS_PLL2 and configured using assigned-clock DT properties. Each CSI2 DT node configures its IMX8MP_CLK_MEDIA_CAMn_PIX_ROOT clock. This used to be the case until likely a copy-paste error in commit f78835d1e616 ("arm64: dts: imx8mp: reparent MEDIA_MIPI_PHY1_REF to CLK_24M") which changed the second CSI2 node to configure IMX8MP_CLK_MEDIA_CAM1_PIX_ROOT using its assigned-clocks property. Fix the second CSI2 assigned-clock property back to the original correct IMX8MP_CLK_MEDIA_CAM2_PIX_ROOT . Fixes: f78835d1e616 ("arm64: dts: imx8mp: reparent MEDIA_MIPI_PHY1_REF to CLK_24M") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index bfc5c81a5bd4..8141926e4ef1 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1672,7 +1672,7 @@ mipi_csi_1: csi@32e50000 { <&clk IMX8MP_CLK_MEDIA_MIPI_PHY1_REF_ROOT>, <&clk IMX8MP_CLK_MEDIA_AXI_ROOT>; clock-names = "pclk", "wrap", "phy", "axi"; - assigned-clocks = <&clk IMX8MP_CLK_MEDIA_CAM1_PIX>, + assigned-clocks = <&clk IMX8MP_CLK_MEDIA_CAM2_PIX>, <&clk IMX8MP_CLK_MEDIA_MIPI_PHY1_REF>; assigned-clock-parents = <&clk IMX8MP_SYS_PLL2_1000M>, <&clk IMX8MP_CLK_24M>; From e858beeddfa3a400844c0e22d2118b3b52f1ea5e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 21 Apr 2024 23:32:18 -0400 Subject: [PATCH 162/313] bcachefs: If we run merges at a lower watermark, they must be nonblocking Fix another deadlock related to the merge path; previously, we switched to always running merges at a lower watermark (because they are noncritical); but when we run at a lower watermark we also need to run nonblocking or we've introduced a new deadlock. Signed-off-by: Kent Overstreet Reported-and-tested-by: s@m-h.ug --- fs/bcachefs/btree_update_interior.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 6030c396754f..b4efd8cc4d1a 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1960,7 +1960,11 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates) return 0; - flags &= ~BCH_WATERMARK_MASK; + if ((flags & BCH_WATERMARK_MASK) <= BCH_WATERMARK_reclaim) { + flags &= ~BCH_WATERMARK_MASK; + flags |= BCH_WATERMARK_btree; + flags |= BCH_TRANS_COMMIT_journal_reclaim; + } b = trans->paths[path].l[level].b; From 91112fc6212a9be6f3be636d885df9c17395e1a4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 20 Apr 2024 15:44:36 +0200 Subject: [PATCH 163/313] wifi: iwlwifi: mvm: fix link ID management On older (pre-MLD API) devices, we started also calling iwl_mvm_set_link_mapping()/iwl_mvm_unset_link_mapping(), but of course not also iwl_mvm_remove_link(). Since the link ID was only released in iwl_mvm_remove_link() this causes us to run out of FW link IDs very quickly. Fix it by releasing the link ID correctly. Fixes: a8b5d4809b50 ("wifi: iwlwifi: mvm: Configure the link mapping for non-MLD FW") Link: https://msgid.link/20240420154435.dce72db5d5e3.Ic40b454b24f1c7b380a1eedf67455d9cf2f58541@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/link.c b/drivers/net/wireless/intel/iwlwifi/mvm/link.c index 9f69e04594e4..fe5bba8561d0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/link.c @@ -279,6 +279,7 @@ int iwl_mvm_unset_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif, RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id], NULL); + iwl_mvm_release_fw_link_id(mvm, link_info->fw_link_id); return 0; } @@ -296,7 +297,6 @@ int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, return 0; cmd.link_id = cpu_to_le32(link_info->fw_link_id); - iwl_mvm_release_fw_link_id(mvm, link_info->fw_link_id); link_info->fw_link_id = IWL_MVM_FW_LINK_ID_INVALID; cmd.spec_link_id = link_conf->link_id; cmd.phy_id = cpu_to_le32(FW_CTXT_INVALID); From 0b8fe5bd73249dc20be2e88a12041f8920797b59 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 18 Apr 2024 13:12:07 +0200 Subject: [PATCH 164/313] net: usb: qmi_wwan: add Telit FN920C04 compositions Add the following Telit FN920C04 compositions: 0x10a0: rmnet + tty (AT/NMEA) + tty (AT) + tty (diag) T: Bus=03 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a0 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=92c4c4d8 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10a4: rmnet + tty (AT) + tty (AT) + tty (diag) T: Bus=03 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a4 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=92c4c4d8 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10a9: rmnet + tty (AT) + tty (diag) + DPL (data packet logging) + adb T: Bus=03 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a9 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=92c4c4d8 C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Daniele Palmas Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index edc34402e787..a5469cf5cf67 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1368,6 +1368,9 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a9, 0)}, /* Telit FN920C04 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ From c58e88d49097bd12dfcfef4f075b43f5d5830941 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 20 Apr 2024 07:01:16 +0000 Subject: [PATCH 165/313] icmp: prevent possible NULL dereferences from icmp_build_probe() First problem is a double call to __in_dev_get_rcu(), because the second one could return NULL. if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list) Second problem is a read from dev->ip6_ptr with no NULL check: if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) Use the correct RCU API to fix these. v2: add missing include Fixes: d329ea5bd884 ("icmp: add response to RFC 8335 PROBE messages") Signed-off-by: Eric Dumazet Cc: Andreas Roeseler Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e63a3bf99617..437e782b9663 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -92,6 +92,7 @@ #include #include #include +#include /* * Build xmit assembly blocks @@ -1032,6 +1033,8 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) struct icmp_ext_hdr *ext_hdr, _ext_hdr; struct icmp_ext_echo_iio *iio, _iio; struct net *net = dev_net(skb->dev); + struct inet6_dev *in6_dev; + struct in_device *in_dev; struct net_device *dev; char buff[IFNAMSIZ]; u16 ident_len; @@ -1115,10 +1118,15 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) /* Fill bits in reply message */ if (dev->flags & IFF_UP) status |= ICMP_EXT_ECHOREPLY_ACTIVE; - if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list) + + in_dev = __in_dev_get_rcu(dev); + if (in_dev && rcu_access_pointer(in_dev->ifa_list)) status |= ICMP_EXT_ECHOREPLY_IPV4; - if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) + + in6_dev = __in6_dev_get(dev); + if (in6_dev && !list_empty(&in6_dev->addr_list)) status |= ICMP_EXT_ECHOREPLY_IPV6; + dev_put(dev); icmphdr->un.echo.sequence |= htons(status); return true; From 70dcdf5f8c41ce2379d48d497db10af4a09ea075 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sun, 21 Apr 2024 14:44:58 -0500 Subject: [PATCH 166/313] mailmap: add entries for Alex Elder Define my kernel.org address to be the canonical one, and add mailmap entries for the various addresses (including typos) that have been used over the years. Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- .mailmap | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.mailmap b/.mailmap index 8284692f9610..f932ce611898 100644 --- a/.mailmap +++ b/.mailmap @@ -38,6 +38,16 @@ Alexei Starovoitov Alexei Starovoitov Alexei Starovoitov Alexey Makhalov +Alex Elder +Alex Elder +Alex Elder +Alex Elder +Alex Elder +Alex Elder +Alex Elder +Alex Elder +Alex Elder +Alex Elder Alex Hung Alex Shi Alex Shi From 4fd1edcdf13c0d234543ecf502092be65c5177db Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 19 Apr 2024 16:02:00 +0800 Subject: [PATCH 167/313] bridge/br_netlink.c: no need to return void function br_info_notify is a void function. There is no need to return. Fixes: b6d0425b816e ("bridge: cfm: Netlink Notifications.") Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 2cf4fc756263..f17dbac7d828 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -667,7 +667,7 @@ void br_ifinfo_notify(int event, const struct net_bridge *br, { u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; - return br_info_notify(event, br, port, filter); + br_info_notify(event, br, port, filter); } /* From 11b1b8bc2b98e21ddf47e08b56c21502c685b2c3 Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Wed, 6 Mar 2024 10:21:32 +0800 Subject: [PATCH 168/313] sched/eevdf: Always update V if se->on_rq when reweighting reweight_eevdf() needs the latest V to do accurate calculation for new ve and vd. So update V unconditionally when se is runnable. Fixes: eab03c23c2a1 ("sched/eevdf: Fix vruntime adjustment on reweight") Suggested-by: Abel Wu Signed-off-by: Tianchen Ding Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Abel Wu Tested-by: K Prateek Nayak Tested-by: Chen Yu Link: https://lore.kernel.org/r/20240306022133.81008-2-dtcccc@linux.alibaba.com --- kernel/sched/fair.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 03be0d1330a6..5551ce2af73e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3790,9 +3790,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, if (se->on_rq) { /* commit outstanding execution time */ - if (curr) - update_curr(cfs_rq); - else + update_curr(cfs_rq); + if (!curr) __dequeue_entity(cfs_rq, se); update_load_sub(&cfs_rq->load, se->load.weight); } From afae8002b4fd3560c8f5f1567f3c3202c30a70fa Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Wed, 6 Mar 2024 10:21:33 +0800 Subject: [PATCH 169/313] sched/eevdf: Fix miscalculation in reweight_entity() when se is not curr reweight_eevdf() only keeps V unchanged inside itself. When se != cfs_rq->curr, it would be dequeued from rb tree first. So that V is changed and the result is wrong. Pass the original V to reweight_eevdf() to fix this issue. Fixes: eab03c23c2a1 ("sched/eevdf: Fix vruntime adjustment on reweight") Signed-off-by: Tianchen Ding [peterz: flip if() condition for clarity] Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Abel Wu Link: https://lkml.kernel.org/r/20240306022133.81008-3-dtcccc@linux.alibaba.com --- kernel/sched/fair.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5551ce2af73e..6d266917d38d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3676,11 +3676,10 @@ static inline void dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } #endif -static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se, +static void reweight_eevdf(struct sched_entity *se, u64 avruntime, unsigned long weight) { unsigned long old_weight = se->load.weight; - u64 avruntime = avg_vruntime(cfs_rq); s64 vlag, vslice; /* @@ -3787,24 +3786,26 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight) { bool curr = cfs_rq->curr == se; + u64 avruntime; if (se->on_rq) { /* commit outstanding execution time */ update_curr(cfs_rq); + avruntime = avg_vruntime(cfs_rq); if (!curr) __dequeue_entity(cfs_rq, se); update_load_sub(&cfs_rq->load, se->load.weight); } dequeue_load_avg(cfs_rq, se); - if (!se->on_rq) { + if (se->on_rq) { + reweight_eevdf(se, avruntime, weight); + } else { /* * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i), * we need to scale se->vlag when w_i changes. */ se->vlag = div_s64(se->vlag * se->load.weight, weight); - } else { - reweight_eevdf(cfs_rq, se, weight); } update_load_set(&se->load, weight); From 1560d1f6eb6b398bddd80c16676776c0325fe5fe Mon Sep 17 00:00:00 2001 From: Xuewen Yan Date: Mon, 22 Apr 2024 16:22:38 +0800 Subject: [PATCH 170/313] sched/eevdf: Prevent vlag from going out of bounds in reweight_eevdf() It was possible to have pick_eevdf() return NULL, which then causes a NULL-deref. This turned out to be due to entity_eligible() returning falsely negative because of a s64 multiplcation overflow. Specifically, reweight_eevdf() computes the vlag without considering the limit placed upon vlag as update_entity_lag() does, and then the scaling multiplication (remember that weight is 20bit fixed point) can overflow. This then leads to the new vruntime being weird which then causes the above entity_eligible() to go side-ways and claim nothing is eligible. Thus limit the range of vlag accordingly. All this was quite rare, but fatal when it does happen. Closes: https://lore.kernel.org/all/ZhuYyrh3mweP_Kd8@nz.home/ Closes: https://lore.kernel.org/all/CA+9S74ih+45M_2TPUY_mPPVDhNvyYfy1J1ftSix+KjiTVxg8nw@mail.gmail.com/ Closes: https://lore.kernel.org/lkml/202401301012.2ed95df0-oliver.sang@intel.com/ Fixes: eab03c23c2a1 ("sched/eevdf: Fix vruntime adjustment on reweight") Reported-by: Sergei Trofimovich Reported-by: Igor Raits Reported-by: Breno Leitao Reported-by: kernel test robot Reported-by: Yujie Liu Signed-off-by: Xuewen Yan Reviewed-and-tested-by: Chen Yu Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240422082238.5784-1-xuewen.yan@unisoc.com --- kernel/sched/fair.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6d266917d38d..c62805dbd608 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -696,15 +696,21 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) * * XXX could add max_slice to the augmented data to track this. */ +static s64 entity_lag(u64 avruntime, struct sched_entity *se) +{ + s64 vlag, limit; + + vlag = avruntime - se->vruntime; + limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); + + return clamp(vlag, -limit, limit); +} + static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) { - s64 lag, limit; - SCHED_WARN_ON(!se->on_rq); - lag = avg_vruntime(cfs_rq) - se->vruntime; - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); - se->vlag = clamp(lag, -limit, limit); + se->vlag = entity_lag(avg_vruntime(cfs_rq), se); } /* @@ -3760,7 +3766,7 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime, * = V - vl' */ if (avruntime != se->vruntime) { - vlag = (s64)(avruntime - se->vruntime); + vlag = entity_lag(avruntime, se); vlag = div_s64(vlag * old_weight, weight); se->vruntime = avruntime - vlag; } From 7474b1c82be3780692d537d331f9aa7fc1e5a368 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Fri, 19 Apr 2024 11:34:47 -0700 Subject: [PATCH 171/313] bnxt_en: refactor reset close code Introduce bnxt_fw_fatal_close() API which can be used to stop data path and disable device when firmware is in fatal state. Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 57e61f963167..c852f87c842f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13037,6 +13037,16 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) bnxt_rtnl_unlock_sp(bp); } +static void bnxt_fw_fatal_close(struct bnxt *bp) +{ + bnxt_tx_disable(bp); + bnxt_disable_napi(bp); + bnxt_disable_int_sync(bp); + bnxt_free_irq(bp); + bnxt_clear_int_mode(bp); + pci_disable_device(bp->pdev); +} + static void bnxt_fw_reset_close(struct bnxt *bp) { bnxt_ulp_stop(bp); @@ -13050,12 +13060,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp) pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID, &val); if (val == 0xffff) bp->fw_reset_min_dsecs = 0; - bnxt_tx_disable(bp); - bnxt_disable_napi(bp); - bnxt_disable_int_sync(bp); - bnxt_free_irq(bp); - bnxt_clear_int_mode(bp); - pci_disable_device(bp->pdev); + bnxt_fw_fatal_close(bp); } __bnxt_close_nic(bp, true, false); bnxt_vf_reps_free(bp); From a1acdc226baec331512f815d6ac9dd6f8435cc7f Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Fri, 19 Apr 2024 11:34:48 -0700 Subject: [PATCH 172/313] bnxt_en: Fix the PCI-AER routines We do not support two simultaneous recoveries so check for reset flag, BNXT_STATE_IN_FW_RESET, and do not proceed with AER further. When the pci channel state is pci_channel_io_frozen, the PCIe link can not be trusted so we disable the traffic immediately and stop BAR access by calling bnxt_fw_fatal_close(). BAR access after AER fatal error can cause an NMI. Fixes: f75d9a0aa967 ("bnxt_en: Re-write PCI BARs after PCI fatal error.") Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c852f87c842f..86c1c30c70d5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15378,6 +15378,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, { struct net_device *netdev = pci_get_drvdata(pdev); struct bnxt *bp = netdev_priv(netdev); + bool abort = false; netdev_info(netdev, "PCI I/O error detected\n"); @@ -15386,16 +15387,27 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, bnxt_ulp_stop(bp); - if (state == pci_channel_io_perm_failure) { + if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { + netdev_err(bp->dev, "Firmware reset already in progress\n"); + abort = true; + } + + if (abort || state == pci_channel_io_perm_failure) { rtnl_unlock(); return PCI_ERS_RESULT_DISCONNECT; } - if (state == pci_channel_io_frozen) + /* Link is not reliable anymore if state is pci_channel_io_frozen + * so we disable bus master to prevent any potential bad DMAs before + * freeing kernel memory. + */ + if (state == pci_channel_io_frozen) { set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state); + bnxt_fw_fatal_close(bp); + } if (netif_running(netdev)) - bnxt_close(netdev); + __bnxt_close_nic(bp, true, true); if (pci_is_enabled(pdev)) pci_disable_device(pdev); @@ -15479,6 +15491,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) } reset_exit: + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); bnxt_clear_reservations(bp, true); rtnl_unlock(); From 41e54045b741daf61e03c82d442227af3d12111f Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 19 Apr 2024 11:34:49 -0700 Subject: [PATCH 173/313] bnxt_en: Fix error recovery for 5760X (P7) chips During error recovery, such as AER fatal error slot reset, we call bnxt_try_map_fw_health_reg() to try to get access to the health register to determine the firmware state. Fix bnxt_try_map_fw_health_reg() to recognize the P7 chip correctly and set up the health register. This fixes this type of AER slot reset failure: bnxt_en 0000:04:00.0: AER: PCIe Bus Error: severity=Uncorrectable (Fatal), type=Inaccessible, (Unregistered Agent ID) bnxt_en 0000:04:00.0 enp4s0f0np0: PCI I/O error detected bnxt_en 0000:04:00.0 bnxt_re0: Handle device suspend call bnxt_en 0000:04:00.1 enp4s0f1np1: PCI I/O error detected bnxt_en 0000:04:00.1 bnxt_re1: Handle device suspend call pcieport 0000:00:02.0: AER: Root Port link has been reset (0) bnxt_en 0000:04:00.0 enp4s0f0np0: PCI Slot Reset bnxt_en 0000:04:00.0: enabling device (0000 -> 0002) bnxt_en 0000:04:00.0: Firmware not ready bnxt_en 0000:04:00.1 enp4s0f1np1: PCI Slot Reset bnxt_en 0000:04:00.1: enabling device (0000 -> 0002) bnxt_en 0000:04:00.1: Firmware not ready pcieport 0000:00:02.0: AER: device recovery failed Fixes: a432a45bdba4 ("bnxt_en: Define basic P7 macros") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 86c1c30c70d5..ed04a90a4fdd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9089,7 +9089,7 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp) BNXT_FW_HEALTH_WIN_BASE + BNXT_GRC_REG_CHIP_NUM); } - if (!BNXT_CHIP_P5(bp)) + if (!BNXT_CHIP_P5_PLUS(bp)) return; status_loc = BNXT_GRC_REG_STATUS_P5 | From d05dcfdf5e1659b2949d13060284eff3888b644e Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Mon, 15 Apr 2024 20:24:37 +0000 Subject: [PATCH 174/313] fs/9p: mitigate inode collisions Detect and mitigate inode collsions that now occur since we fixed 9p generating duplicate inode structures. Underlying cause of these appears to be a race condition between reuse of inode numbers in underlying file system and cleanup of inode numbers in the client. Enabling caching makes this much more likely to happen as it increases cleanup latency due to writebacks. Reported-by: Kent Overstreet Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs.h | 11 ++++++----- fs/9p/vfs_inode.c | 37 +++++++++++++++++++++++++++++-------- fs/9p/vfs_inode_dotl.c | 28 ++++++++++++++++++++-------- fs/9p/vfs_super.c | 2 +- 4 files changed, 56 insertions(+), 22 deletions(-) diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 9defa12208f9..1775fcc7f0e8 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -179,13 +179,14 @@ extern int v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); -extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid); +extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid, + bool new); extern const struct inode_operations v9fs_dir_inode_operations_dotl; extern const struct inode_operations v9fs_file_inode_operations_dotl; extern const struct inode_operations v9fs_symlink_inode_operations_dotl; extern const struct netfs_request_ops v9fs_req_ops; extern struct inode *v9fs_fid_iget_dotl(struct super_block *sb, - struct p9_fid *fid); + struct p9_fid *fid, bool new); /* other default globals */ #define V9FS_PORT 564 @@ -224,12 +225,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses) */ static inline struct inode * v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, - struct super_block *sb) + struct super_block *sb, bool new) { if (v9fs_proto_dotl(v9ses)) - return v9fs_fid_iget_dotl(sb, fid); + return v9fs_fid_iget_dotl(sb, fid, new); else - return v9fs_fid_iget(sb, fid); + return v9fs_fid_iget(sb, fid, new); } #endif diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 47bd77199e20..7a3308d77606 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -364,7 +364,8 @@ void v9fs_evict_inode(struct inode *inode) clear_inode(inode); } -struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid) +struct inode * +v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid, bool new) { dev_t rdev; int retval; @@ -376,8 +377,18 @@ struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid) inode = iget_locked(sb, QID2INO(&fid->qid)); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) - return inode; + if (!(inode->i_state & I_NEW)) { + if (!new) { + goto done; + } else { + p9_debug(P9_DEBUG_VFS, "WARNING: Inode collision %ld\n", + inode->i_ino); + iput(inode); + remove_inode_hash(inode); + inode = iget_locked(sb, QID2INO(&fid->qid)); + WARN_ON(!(inode->i_state & I_NEW)); + } + } /* * initialize the inode with the stat info @@ -401,11 +412,11 @@ struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid) v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); unlock_new_inode(inode); +done: return inode; error: iget_failed(inode); return ERR_PTR(retval); - } /** @@ -437,8 +448,15 @@ static int v9fs_at_to_dotl_flags(int flags) */ static void v9fs_dec_count(struct inode *inode) { - if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) - drop_nlink(inode); + if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) { + if (inode->i_nlink) { + drop_nlink(inode); + } else { + p9_debug(P9_DEBUG_VFS, + "WARNING: unexpected i_nlink zero %d inode %ld\n", + inode->i_nlink, inode->i_ino); + } + } } /** @@ -489,6 +507,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) } else v9fs_dec_count(inode); + if (inode->i_nlink <= 0) /* no more refs unhash it */ + remove_inode_hash(inode); + v9fs_invalidate_inode_attr(inode); v9fs_invalidate_inode_attr(dir); @@ -554,7 +575,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, /* * instantiate inode and assign the unopened fid to the dentry */ - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, @@ -683,7 +704,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, else if (IS_ERR(fid)) inode = ERR_CAST(fid); else - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, false); /* * If we had a rename on the server and a parallel lookup * for the new name, then make sure we instantiate with diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 55dde186041a..c61b97bd13b9 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -52,7 +52,10 @@ static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) return current_fsgid(); } -struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid) + + +struct inode * +v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid, bool new) { int retval; struct inode *inode; @@ -62,8 +65,18 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid) inode = iget_locked(sb, QID2INO(&fid->qid)); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) - return inode; + if (!(inode->i_state & I_NEW)) { + if (!new) { + goto done; + } else { /* deal with race condition in inode number reuse */ + p9_debug(P9_DEBUG_ERROR, "WARNING: Inode collision %lx\n", + inode->i_ino); + iput(inode); + remove_inode_hash(inode); + inode = iget_locked(sb, QID2INO(&fid->qid)); + WARN_ON(!(inode->i_state & I_NEW)); + } + } /* * initialize the inode with the stat info @@ -90,12 +103,11 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid) goto error; unlock_new_inode(inode); - +done: return inode; error: iget_failed(inode); return ERR_PTR(retval); - } struct dotl_openflag_map { @@ -247,7 +259,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto out; } - inode = v9fs_fid_iget_dotl(dir->i_sb, fid); + inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); @@ -340,7 +352,7 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, } /* instantiate inode and assign the unopened fid to the dentry */ - inode = v9fs_fid_iget_dotl(dir->i_sb, fid); + inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", @@ -776,7 +788,7 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, err); goto error; } - inode = v9fs_fid_iget_dotl(dir->i_sb, fid); + inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 55e67e36ae68..f52fdf42945c 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -139,7 +139,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, else sb->s_d_op = &v9fs_dentry_operations; - inode = v9fs_get_inode_from_fid(v9ses, fid, sb); + inode = v9fs_get_inode_from_fid(v9ses, fid, sb, true); if (IS_ERR(inode)) { retval = PTR_ERR(inode); goto release_sb; From 77d8aa79ecfb209308e0644c02f655122b31def7 Mon Sep 17 00:00:00 2001 From: Takayuki Nagata Date: Mon, 15 Apr 2024 16:47:49 +0900 Subject: [PATCH 175/313] cifs: reinstate original behavior again for forceuid/forcegid forceuid/forcegid should be enabled by default when uid=/gid= options are specified, but commit 24e0a1eff9e2 ("cifs: switch to new mount api") changed the behavior. Due to the change, a mounted share does not show intentional uid/gid for files and directories even though uid=/gid= options are specified since forceuid/forcegid are not enabled. This patch reinstates original behavior that overrides uid/gid with specified uid/gid by the options. Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Signed-off-by: Takayuki Nagata Acked-by: Paulo Alcantara (Red Hat) Acked-by: Ronnie Sahlberg Acked-by: Tom Talpey Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 12 ++++++++++++ fs/smb/client/fs_context.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 6c727d8c31e8..3bbac925d076 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -748,6 +748,16 @@ static int smb3_fs_context_validate(struct fs_context *fc) /* set the port that we got earlier */ cifs_set_port((struct sockaddr *)&ctx->dstaddr, ctx->port); + if (ctx->uid_specified && !ctx->forceuid_specified) { + ctx->override_uid = 1; + pr_notice("enabling forceuid mount option implicitly because uid= option is specified\n"); + } + + if (ctx->gid_specified && !ctx->forcegid_specified) { + ctx->override_gid = 1; + pr_notice("enabling forcegid mount option implicitly because gid= option is specified\n"); + } + if (ctx->override_uid && !ctx->uid_specified) { ctx->override_uid = 0; pr_notice("ignoring forceuid mount option specified with no uid= option\n"); @@ -1019,12 +1029,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->override_uid = 0; else ctx->override_uid = 1; + ctx->forceuid_specified = true; break; case Opt_forcegid: if (result.negated) ctx->override_gid = 0; else ctx->override_gid = 1; + ctx->forcegid_specified = true; break; case Opt_perm: if (result.negated) diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index a947bddeba27..cf577ec0dd0a 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -165,6 +165,8 @@ enum cifs_param { }; struct smb3_fs_context { + bool forceuid_specified; + bool forcegid_specified; bool uid_specified; bool cruid_specified; bool gid_specified; From 4b759dd5765503bd466defac7d93aca14c23a15d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 5 Apr 2024 15:00:16 -0700 Subject: [PATCH 176/313] cxl/core: Fix potential payload size confusion in cxl_mem_get_poison() A recent change to cxl_mem_get_records_log() [1] highlighted a subtle nuance of looping calls to cxl_internal_send_cmd(), i.e. that cxl_internal_send_cmd() modifies the 'size_out' member of the @mbox_cmd argument. That mechanism is useful for communicating underflow, but it is unwanted when reusing @mbox_cmd for a subsequent submission. It turns out that cxl_xfer_log() avoids this scenario by always redefining @mbox_cmd each iteration. Update cxl_mem_get_records_log() and cxl_mem_get_poison() to follow the same style as cxl_xfer_log(), i.e. re-define @mbox_cmd each iteration. The cxl_mem_get_records_log() change is just a style fixup, but the cxl_mem_get_poison() change is a potential fix, per Alison [2]: Poison list retrieval can hit this case if the MORE flag is set and a follow on read of the list delivers more records than the previous read. ie. device gives one record, sets the _MORE flag, then gives 5. Not an urgent fix since this behavior has not been seen in the wild, but worth tracking as a fix. Cc: Kwangjin Ko Cc: Alison Schofield Fixes: ed83f7ca398b ("cxl/mbox: Add GET_POISON_LIST mailbox command") Link: http://lore.kernel.org/r/20240402081404.1106-2-kwangjin.ko@sk.com [1] Link: http://lore.kernel.org/r/ZhAhAL/GOaWFrauw@aschofie-mobl2 [2] Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Link: https://lore.kernel.org/r/171235441633.2716581.12330082428680958635.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index f0f54aeccc87..65185c9fa001 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -946,25 +946,22 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds, struct cxl_memdev *cxlmd = mds->cxlds.cxlmd; struct device *dev = mds->cxlds.dev; struct cxl_get_event_payload *payload; - struct cxl_mbox_cmd mbox_cmd; u8 log_type = type; u16 nr_rec; mutex_lock(&mds->event.log_lock); payload = mds->event.buf; - mbox_cmd = (struct cxl_mbox_cmd) { - .opcode = CXL_MBOX_OP_GET_EVENT_RECORD, - .payload_in = &log_type, - .size_in = sizeof(log_type), - .payload_out = payload, - .min_out = struct_size(payload, records, 0), - }; - do { int rc, i; - - mbox_cmd.size_out = mds->payload_size; + struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_EVENT_RECORD, + .payload_in = &log_type, + .size_in = sizeof(log_type), + .payload_out = payload, + .size_out = mds->payload_size, + .min_out = struct_size(payload, records, 0), + }; rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc) { @@ -1297,7 +1294,6 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_mbox_poison_out *po; struct cxl_mbox_poison_in pi; - struct cxl_mbox_cmd mbox_cmd; int nr_records = 0; int rc; @@ -1309,16 +1305,16 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, pi.offset = cpu_to_le64(offset); pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT); - mbox_cmd = (struct cxl_mbox_cmd) { - .opcode = CXL_MBOX_OP_GET_POISON, - .size_in = sizeof(pi), - .payload_in = &pi, - .size_out = mds->payload_size, - .payload_out = po, - .min_out = struct_size(po, record, 0), - }; - do { + struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd){ + .opcode = CXL_MBOX_OP_GET_POISON, + .size_in = sizeof(pi), + .payload_in = &pi, + .size_out = mds->payload_size, + .payload_out = po, + .min_out = struct_size(po, record, 0), + }; + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc) break; From e70316d17f6ab49a6038ffd115397fd68f8c7be8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 22 Apr 2024 08:39:21 -0500 Subject: [PATCH 177/313] x86/sev: Check for MWAITX and MONITORX opcodes in the #VC handler The MWAITX and MONITORX instructions generate the same #VC error code as the MWAIT and MONITOR instructions, respectively. Update the #VC handler opcode checking to also support the MWAITX and MONITORX opcodes. Fixes: e3ef461af35a ("x86/sev: Harden #VC instruction emulation somewhat") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/453d5a7cfb4b9fe818b6fb67f93ae25468bc9e23.1713793161.git.thomas.lendacky@amd.com --- arch/x86/kernel/sev-shared.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 8b04958da5e7..b4f8fa0f722c 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -1203,12 +1203,14 @@ static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt, break; case SVM_EXIT_MONITOR: - if (opcode == 0x010f && modrm == 0xc8) + /* MONITOR and MONITORX instructions generate the same error code */ + if (opcode == 0x010f && (modrm == 0xc8 || modrm == 0xfa)) return ES_OK; break; case SVM_EXIT_MWAIT: - if (opcode == 0x010f && modrm == 0xc9) + /* MWAIT and MWAITX instructions generate the same error code */ + if (opcode == 0x010f && (modrm == 0xc9 || modrm == 0xfb)) return ES_OK; break; From 05d92ee782eeb7b939bdd0189e6efcab9195bf95 Mon Sep 17 00:00:00 2001 From: Jarred White Date: Mon, 8 Apr 2024 22:23:09 -0700 Subject: [PATCH 178/313] ACPI: CPPC: Fix bit_offset shift in MASK_VAL() macro Commit 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") neglected to properly wrap the bit_offset shift when it comes to applying the mask. This may cause incorrect values to be read and may cause the cpufreq module not be loaded. [ 11.059751] cpu_capacity: CPU0 missing/invalid highest performance. [ 11.066005] cpu_capacity: partial information: fallback to 1024 for all CPUs Also, corrected the bitmask generation in GENMASK (extra bit being added). Fixes: 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") Signed-off-by: Jarred White Cc: 5.15+ # 5.15+ Reviewed-by: Vanshidhar Konda Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 4bfbe55553f4..00a30ca35e78 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -170,8 +170,8 @@ show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time); #define GET_BIT_WIDTH(reg) ((reg)->access_width ? (8 << ((reg)->access_width - 1)) : (reg)->bit_width) /* Shift and apply the mask for CPC reads/writes */ -#define MASK_VAL(reg, val) ((val) >> ((reg)->bit_offset & \ - GENMASK(((reg)->bit_width), 0))) +#define MASK_VAL(reg, val) (((val) >> (reg)->bit_offset) & \ + GENMASK(((reg)->bit_width) - 1, 0)) static ssize_t show_feedback_ctrs(struct kobject *kobj, struct kobj_attribute *attr, char *buf) From f489c948028b69cea235d9c0de1cc10eeb26a172 Mon Sep 17 00:00:00 2001 From: Vanshidhar Konda Date: Thu, 11 Apr 2024 16:18:44 -0700 Subject: [PATCH 179/313] ACPI: CPPC: Fix access width used for PCC registers commit 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") modified cpc_read()/cpc_write() to use access_width to read CPC registers. However, for PCC registers the access width field in the ACPI register macro specifies the PCC subspace ID. For non-zero PCC subspace ID it is incorrectly treated as access width. This causes errors when reading from PCC registers in the CPPC driver. For PCC registers, base the size of read/write on the bit width field. The debug message in cpc_read()/cpc_write() is updated to print relevant information for the address space type used to read the register. Fixes: 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") Signed-off-by: Vanshidhar Konda Tested-by: Jarred White Reviewed-by: Jarred White Reviewed-by: Easwar Hariharan Cc: 5.15+ # 5.15+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 53 ++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 00a30ca35e78..a40b6f3946ef 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1002,14 +1002,14 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) } *val = 0; + size = GET_BIT_WIDTH(reg); if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = GET_BIT_WIDTH(reg); u32 val_u32; acpi_status status; status = acpi_os_read_port((acpi_io_address)reg->address, - &val_u32, width); + &val_u32, size); if (ACPI_FAILURE(status)) { pr_debug("Error: Failed to read SystemIO port %llx\n", reg->address); @@ -1018,17 +1018,22 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = val_u32; return 0; - } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) { + /* + * For registers in PCC space, the register size is determined + * by the bit width field; the access size is used to indicate + * the PCC subspace id. + */ + size = reg->bit_width; vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) return cpc_read_ffh(cpu, reg, val); else return acpi_os_read_memory((acpi_physical_address)reg->address, - val, reg->bit_width); - - size = GET_BIT_WIDTH(reg); + val, size); switch (size) { case 8: @@ -1044,8 +1049,13 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = readq_relaxed(vaddr); break; default: - pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n", - reg->bit_width, pcc_ss_id); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + pr_debug("Error: Cannot read %u bit width from system memory: 0x%llx\n", + size, reg->address); + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n", + size, pcc_ss_id); + } return -EFAULT; } @@ -1063,12 +1073,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; + size = GET_BIT_WIDTH(reg); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = GET_BIT_WIDTH(reg); acpi_status status; status = acpi_os_write_port((acpi_io_address)reg->address, - (u32)val, width); + (u32)val, size); if (ACPI_FAILURE(status)) { pr_debug("Error: Failed to write SystemIO port %llx\n", reg->address); @@ -1076,17 +1087,22 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) } return 0; - } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) { + /* + * For registers in PCC space, the register size is determined + * by the bit width field; the access size is used to indicate + * the PCC subspace id. + */ + size = reg->bit_width; vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) return cpc_write_ffh(cpu, reg, val); else return acpi_os_write_memory((acpi_physical_address)reg->address, - val, reg->bit_width); - - size = GET_BIT_WIDTH(reg); + val, size); if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) val = MASK_VAL(reg, val); @@ -1105,8 +1121,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) writeq_relaxed(val, vaddr); break; default: - pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n", - reg->bit_width, pcc_ss_id); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + pr_debug("Error: Cannot write %u bit width to system memory: 0x%llx\n", + size, reg->address); + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n", + size, pcc_ss_id); + } ret_val = -EFAULT; break; } From a4e3899065ffa87d49dc20e8c17501edbc189692 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 17 Apr 2024 12:37:37 +0200 Subject: [PATCH 180/313] net: dsa: mv88e6xx: fix supported_interfaces setup in mv88e6250_phylink_get_caps() With the recent PHYLINK changes requiring supported_interfaces to be set, MV88E6250 family switches like the 88E6020 fail to probe - cmode is never initialized on these devices, so mv88e6250_phylink_get_caps() does not set any supported_interfaces flags. Instead of a cmode, on 88E6250 we have a read-only port mode value that encodes similar information. There is no reason to bother mapping port mode to the cmodes of other switch models; instead we introduce a mv88e6250_setup_supported_interfaces() that is called directly from mv88e6250_phylink_get_caps(). Fixes: de5c9bf40c45 ("net: phylink: require supported_interfaces to be filled") Signed-off-by: Matthias Schiffer Link: https://lore.kernel.org/r/20240417103737.166651-1-matthias.schiffer@ew.tq-group.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 56 +++++++++++++++++++++++++++++--- drivers/net/dsa/mv88e6xxx/port.h | 23 ++++++++++--- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index c95787cb9086..59b5dd0e2f41 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -566,13 +566,61 @@ static void mv88e6xxx_translate_cmode(u8 cmode, unsigned long *supported) phy_interface_set_rgmii(supported); } +static void +mv88e6250_setup_supported_interfaces(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) +{ + unsigned long *supported = config->supported_interfaces; + int err; + u16 reg; + + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®); + if (err) { + dev_err(chip->dev, "p%d: failed to read port status\n", port); + return; + } + + switch (reg & MV88E6250_PORT_STS_PORTMODE_MASK) { + case MV88E6250_PORT_STS_PORTMODE_MII_10_HALF_PHY: + case MV88E6250_PORT_STS_PORTMODE_MII_100_HALF_PHY: + case MV88E6250_PORT_STS_PORTMODE_MII_10_FULL_PHY: + case MV88E6250_PORT_STS_PORTMODE_MII_100_FULL_PHY: + __set_bit(PHY_INTERFACE_MODE_REVMII, supported); + break; + + case MV88E6250_PORT_STS_PORTMODE_MII_HALF: + case MV88E6250_PORT_STS_PORTMODE_MII_FULL: + __set_bit(PHY_INTERFACE_MODE_MII, supported); + break; + + case MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL_PHY: + case MV88E6250_PORT_STS_PORTMODE_MII_200_RMII_FULL_PHY: + case MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_HALF_PHY: + case MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL_PHY: + __set_bit(PHY_INTERFACE_MODE_REVRMII, supported); + break; + + case MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL: + case MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL: + __set_bit(PHY_INTERFACE_MODE_RMII, supported); + break; + + case MV88E6250_PORT_STS_PORTMODE_MII_100_RGMII: + __set_bit(PHY_INTERFACE_MODE_RGMII, supported); + break; + + default: + dev_err(chip->dev, + "p%d: invalid port mode in status register: %04x\n", + port, reg); + } +} + static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, struct phylink_config *config) { - unsigned long *supported = config->supported_interfaces; - - /* Translate the default cmode */ - mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported); + if (!mv88e6xxx_phy_is_internal(chip, port)) + mv88e6250_setup_supported_interfaces(chip, port, config); config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100; } diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index 86deeb347cbc..ddadeb9bfdae 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -25,10 +25,25 @@ #define MV88E6250_PORT_STS_PORTMODE_PHY_100_HALF 0x0900 #define MV88E6250_PORT_STS_PORTMODE_PHY_10_FULL 0x0a00 #define MV88E6250_PORT_STS_PORTMODE_PHY_100_FULL 0x0b00 -#define MV88E6250_PORT_STS_PORTMODE_MII_10_HALF 0x0c00 -#define MV88E6250_PORT_STS_PORTMODE_MII_100_HALF 0x0d00 -#define MV88E6250_PORT_STS_PORTMODE_MII_10_FULL 0x0e00 -#define MV88E6250_PORT_STS_PORTMODE_MII_100_FULL 0x0f00 +/* - Modes with PHY suffix use output instead of input clock + * - Modes without RMII or RGMII use MII + * - Modes without speed do not have a fixed speed specified in the manual + * ("DC to x MHz" - variable clock support?) + */ +#define MV88E6250_PORT_STS_PORTMODE_MII_DISABLED 0x0000 +#define MV88E6250_PORT_STS_PORTMODE_MII_100_RGMII 0x0100 +#define MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL_PHY 0x0200 +#define MV88E6250_PORT_STS_PORTMODE_MII_200_RMII_FULL_PHY 0x0400 +#define MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL 0x0600 +#define MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL 0x0700 +#define MV88E6250_PORT_STS_PORTMODE_MII_HALF 0x0800 +#define MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_HALF_PHY 0x0900 +#define MV88E6250_PORT_STS_PORTMODE_MII_FULL 0x0a00 +#define MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL_PHY 0x0b00 +#define MV88E6250_PORT_STS_PORTMODE_MII_10_HALF_PHY 0x0c00 +#define MV88E6250_PORT_STS_PORTMODE_MII_100_HALF_PHY 0x0d00 +#define MV88E6250_PORT_STS_PORTMODE_MII_10_FULL_PHY 0x0e00 +#define MV88E6250_PORT_STS_PORTMODE_MII_100_FULL_PHY 0x0f00 #define MV88E6XXX_PORT_STS_LINK 0x0800 #define MV88E6XXX_PORT_STS_DUPLEX 0x0400 #define MV88E6XXX_PORT_STS_SPEED_MASK 0x0300 From 98a821546b3919a10a58faa12ebe5e9a55cd638e Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Mon, 15 Apr 2024 19:10:47 +0800 Subject: [PATCH 181/313] vDPA: code clean for vhost_vdpa uapi This commit cleans up the uapi for vhost_vdpa by better naming some of the enums which report blk information to user space, and they are not in any official releases yet. Fixes: 1ac61ddfee93 ("vDPA: report virtio-blk flush info to user space") Fixes: ae1374b7f72c ("vDPA: report virtio-block read-only info to user space") Fixes: 330b8aea6924 ("vDPA: report virtio-block max segment size to user space") Signed-off-by: Zhu Lingshan Message-Id: <20240415111047.1047774-1-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 6 +++--- include/uapi/linux/vdpa.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index b246067e074b..6cb96a1e8b7d 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -967,7 +967,7 @@ vdpa_dev_blk_seg_size_config_fill(struct sk_buff *msg, u64 features, val_u32 = __virtio32_to_cpu(true, config->size_max); - return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SEG_SIZE, val_u32); + return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, val_u32); } /* fill the block size*/ @@ -1089,7 +1089,7 @@ static int vdpa_dev_blk_ro_config_fill(struct sk_buff *msg, u64 features) u8 ro; ro = ((features & BIT_ULL(VIRTIO_BLK_F_RO)) == 0) ? 0 : 1; - if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_READ_ONLY, ro)) + if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_READ_ONLY, ro)) return -EMSGSIZE; return 0; @@ -1100,7 +1100,7 @@ static int vdpa_dev_blk_flush_config_fill(struct sk_buff *msg, u64 features) u8 flush; flush = ((features & BIT_ULL(VIRTIO_BLK_F_FLUSH)) == 0) ? 0 : 1; - if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_FLUSH, flush)) + if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_FLUSH, flush)) return -EMSGSIZE; return 0; diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 43c51698195c..842bf1201ac4 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -57,7 +57,7 @@ enum vdpa_attr { VDPA_ATTR_DEV_FEATURES, /* u64 */ VDPA_ATTR_DEV_BLK_CFG_CAPACITY, /* u64 */ - VDPA_ATTR_DEV_BLK_CFG_SEG_SIZE, /* u32 */ + VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, /* u32 */ VDPA_ATTR_DEV_BLK_CFG_BLK_SIZE, /* u32 */ VDPA_ATTR_DEV_BLK_CFG_SEG_MAX, /* u32 */ VDPA_ATTR_DEV_BLK_CFG_NUM_QUEUES, /* u16 */ @@ -70,8 +70,8 @@ enum vdpa_attr { VDPA_ATTR_DEV_BLK_CFG_DISCARD_SEC_ALIGN,/* u32 */ VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEC, /* u32 */ VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEG, /* u32 */ - VDPA_ATTR_DEV_BLK_CFG_READ_ONLY, /* u8 */ - VDPA_ATTR_DEV_BLK_CFG_FLUSH, /* u8 */ + VDPA_ATTR_DEV_BLK_READ_ONLY, /* u8 */ + VDPA_ATTR_DEV_BLK_FLUSH, /* u8 */ /* new attributes must be added above here */ VDPA_ATTR_MAX, From 97ec32b583bb08f72146eee2c1a1918e05760f8c Mon Sep 17 00:00:00 2001 From: David Christensen Date: Thu, 18 Apr 2024 15:55:17 -0400 Subject: [PATCH 182/313] MAINTAINERS: eth: mark IBM eHEA as an Orphan Current maintainer Douglas Miller has left IBM and no replacement has been assigned for the driver. The eHEA hardware was last used on IBM POWER7 systems, the last of which reached end-of-support at the end of 2020. Signed-off-by: David Christensen Reviewed-by: Pradeep Satyanarayana Acked-by: Michael Ellerman (powerpc) Link: https://lore.kernel.org/r/20240418195517.528577-1-drc@linux.ibm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c23fda1aa1f0..2d5acd6d98c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7829,9 +7829,8 @@ W: http://aeschi.ch.eu.org/efs/ F: fs/efs/ EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER -M: Douglas Miller L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: drivers/net/ethernet/ibm/ehea/ ELM327 CAN NETWORK DRIVER From 6bea4f03c6a4e973ef369e15aac88f37981db49e Mon Sep 17 00:00:00 2001 From: Paul Geurts Date: Thu, 18 Apr 2024 21:25:38 +0200 Subject: [PATCH 183/313] NFC: trf7970a: disable all regulators on removal During module probe, regulator 'vin' and 'vdd-io' are used and enabled, but the vdd-io regulator overwrites the 'vin' regulator pointer. During remove, only the vdd-io is disabled, as the vin regulator pointer is not available anymore. When regulator_put() is called during resource cleanup a kernel warning is given, as the regulator is still enabled. Store the two regulators in separate pointers and disable both the regulators on module remove. Fixes: 49d22c70aaf0 ("NFC: trf7970a: Add device tree option of 1.8 Volt IO voltage") Signed-off-by: Paul Geurts Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/DB7PR09MB26847A4EBF88D9EDFEB1DA0F950E2@DB7PR09MB2684.eurprd09.prod.outlook.com Signed-off-by: Jakub Kicinski --- drivers/nfc/trf7970a.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 7eb17f46a815..9e1a34e23af2 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -424,7 +424,8 @@ struct trf7970a { enum trf7970a_state state; struct device *dev; struct spi_device *spi; - struct regulator *regulator; + struct regulator *vin_regulator; + struct regulator *vddio_regulator; struct nfc_digital_dev *ddev; u32 quirks; bool is_initiator; @@ -1883,7 +1884,7 @@ static int trf7970a_power_up(struct trf7970a *trf) if (trf->state != TRF7970A_ST_PWR_OFF) return 0; - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vin_regulator); if (ret) { dev_err(trf->dev, "%s - Can't enable VIN: %d\n", __func__, ret); return ret; @@ -1926,7 +1927,7 @@ static int trf7970a_power_down(struct trf7970a *trf) if (trf->en2_gpiod && !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) gpiod_set_value_cansleep(trf->en2_gpiod, 0); - ret = regulator_disable(trf->regulator); + ret = regulator_disable(trf->vin_regulator); if (ret) dev_err(trf->dev, "%s - Can't disable VIN: %d\n", __func__, ret); @@ -2065,37 +2066,37 @@ static int trf7970a_probe(struct spi_device *spi) mutex_init(&trf->lock); INIT_DELAYED_WORK(&trf->timeout_work, trf7970a_timeout_work_handler); - trf->regulator = devm_regulator_get(&spi->dev, "vin"); - if (IS_ERR(trf->regulator)) { - ret = PTR_ERR(trf->regulator); + trf->vin_regulator = devm_regulator_get(&spi->dev, "vin"); + if (IS_ERR(trf->vin_regulator)) { + ret = PTR_ERR(trf->vin_regulator); dev_err(trf->dev, "Can't get VIN regulator: %d\n", ret); goto err_destroy_lock; } - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vin_regulator); if (ret) { dev_err(trf->dev, "Can't enable VIN: %d\n", ret); goto err_destroy_lock; } - uvolts = regulator_get_voltage(trf->regulator); + uvolts = regulator_get_voltage(trf->vin_regulator); if (uvolts > 4000000) trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3; - trf->regulator = devm_regulator_get(&spi->dev, "vdd-io"); - if (IS_ERR(trf->regulator)) { - ret = PTR_ERR(trf->regulator); + trf->vddio_regulator = devm_regulator_get(&spi->dev, "vdd-io"); + if (IS_ERR(trf->vddio_regulator)) { + ret = PTR_ERR(trf->vddio_regulator); dev_err(trf->dev, "Can't get VDD_IO regulator: %d\n", ret); - goto err_destroy_lock; + goto err_disable_vin_regulator; } - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vddio_regulator); if (ret) { dev_err(trf->dev, "Can't enable VDD_IO: %d\n", ret); - goto err_destroy_lock; + goto err_disable_vin_regulator; } - if (regulator_get_voltage(trf->regulator) == 1800000) { + if (regulator_get_voltage(trf->vddio_regulator) == 1800000) { trf->io_ctrl = TRF7970A_REG_IO_CTRL_IO_LOW; dev_dbg(trf->dev, "trf7970a config vdd_io to 1.8V\n"); } @@ -2108,7 +2109,7 @@ static int trf7970a_probe(struct spi_device *spi) if (!trf->ddev) { dev_err(trf->dev, "Can't allocate NFC digital device\n"); ret = -ENOMEM; - goto err_disable_regulator; + goto err_disable_vddio_regulator; } nfc_digital_set_parent_dev(trf->ddev, trf->dev); @@ -2137,8 +2138,10 @@ static int trf7970a_probe(struct spi_device *spi) trf7970a_shutdown(trf); err_free_ddev: nfc_digital_free_device(trf->ddev); -err_disable_regulator: - regulator_disable(trf->regulator); +err_disable_vddio_regulator: + regulator_disable(trf->vddio_regulator); +err_disable_vin_regulator: + regulator_disable(trf->vin_regulator); err_destroy_lock: mutex_destroy(&trf->lock); return ret; @@ -2157,7 +2160,8 @@ static void trf7970a_remove(struct spi_device *spi) nfc_digital_unregister_device(trf->ddev); nfc_digital_free_device(trf->ddev); - regulator_disable(trf->regulator); + regulator_disable(trf->vddio_regulator); + regulator_disable(trf->vin_regulator); mutex_destroy(&trf->lock); } From df70d04d56975f527b9c965322cf56e245909071 Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Thu, 28 Mar 2024 16:54:53 -0300 Subject: [PATCH 184/313] rust: phy: implement `Send` for `Registration` In preparation for requiring `Send` for `Module` implementations in the next patch. Cc: FUJITA Tomonori Cc: Trevor Gross Cc: netdev@vger.kernel.org Signed-off-by: Wedson Almeida Filho Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20240328195457.225001-2-wedsonaf@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/net/phy.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index 96e09c6e8530..265d0e1c1371 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -640,6 +640,10 @@ pub struct Registration { drivers: Pin<&'static mut [DriverVTable]>, } +// SAFETY: The only action allowed in a `Registration` instance is dropping it, which is safe to do +// from any thread because `phy_drivers_unregister` can be called from any thread context. +unsafe impl Send for Registration {} + impl Registration { /// Registers a PHY driver. pub fn register( From 323617f649c0966ad5e741e47e27e06d3a680d8f Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Thu, 28 Mar 2024 16:54:54 -0300 Subject: [PATCH 185/313] rust: kernel: require `Send` for `Module` implementations The thread that calls the module initialisation code when a module is loaded is not guaranteed [in fact, it is unlikely] to be the same one that calls the module cleanup code on module unload, therefore, `Module` implementations must be `Send` to account for them moving from one thread to another implicitly. Signed-off-by: Wedson Almeida Filho Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Cc: stable@vger.kernel.org # 6.8.x: df70d04d5697: rust: phy: implement `Send` for `Registration` Cc: stable@vger.kernel.org Fixes: 247b365dc8dc ("rust: add `kernel` crate") Link: https://lore.kernel.org/r/20240328195457.225001-3-wedsonaf@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index be68d5e567b1..6858e2f8a3ed 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -65,7 +65,7 @@ /// The top level entrypoint to implementing a kernel module. /// /// For any teardown or cleanup operations, your type may implement [`Drop`]. -pub trait Module: Sized + Sync { +pub trait Module: Sized + Sync + Send { /// Called at module initialization time. /// /// Use this method to perform whatever setup or registration your module From 50cfe93b01475ba36878b65d35d812e1bb48ac71 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 22 Apr 2024 11:12:15 +0200 Subject: [PATCH 186/313] kbuild: rust: remove unneeded `@rustc_cfg` to avoid ICE When KUnit tests are enabled, under very big kernel configurations (e.g. `allyesconfig`), we can trigger a `rustdoc` ICE [1]: RUSTDOC TK rust/kernel/lib.rs error: the compiler unexpectedly panicked. this is a bug. The reason is that this build step has a duplicated `@rustc_cfg` argument, which contains the kernel configuration, and thus a lot of arguments. The factor 2 happens to be enough to reach the ICE. Thus remove the unneeded `@rustc_cfg`. By doing so, we clean up the command and workaround the ICE. The ICE has been fixed in the upcoming Rust 1.79 [2]. Cc: stable@vger.kernel.org Fixes: a66d733da801 ("rust: support running Rust documentation tests as KUnit ones") Link: https://github.com/rust-lang/rust/issues/122722 [1] Link: https://github.com/rust-lang/rust/pull/122840 [2] Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20240422091215.526688-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index 846e6ab9d5a9..86a125c4243c 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -175,7 +175,6 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $< mkdir -p $(objtree)/$(obj)/test/doctests/kernel; \ OBJTREE=$(abspath $(objtree)) \ $(RUSTDOC) --test $(rust_flags) \ - @$(objtree)/include/generated/rustc_cfg \ -L$(objtree)/$(obj) --extern alloc --extern kernel \ --extern build_error --extern macros \ --extern bindings --extern uapi \ From d55f90e9b243faa5bcd5c8a323a8f43040500106 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 16 Apr 2024 12:08:51 +0200 Subject: [PATCH 187/313] ntfs3: enforce read-only when used as legacy ntfs driver Ensure that ntfs3 is mounted read-only when it is used to provide the legacy ntfs driver. Signed-off-by: Christian Brauner --- fs/ntfs3/ntfs_fs.h | 2 ++ fs/ntfs3/super.c | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 79356fd29a14..184c8bc76b92 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -1154,4 +1154,6 @@ static inline void le64_sub_cpu(__le64 *var, u64 val) *var = cpu_to_le64(le64_to_cpu(*var) - val); } +bool is_legacy_ntfs(struct super_block *sb); + #endif /* _LINUX_NTFS3_NTFS_FS_H */ diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 8d2e51bae2cb..b26d95a8d327 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -408,6 +408,12 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) struct ntfs_mount_options *new_opts = fc->fs_private; int ro_rw; + /* If ntfs3 is used as legacy ntfs enforce read-only mode. */ + if (is_legacy_ntfs(sb)) { + fc->sb_flags |= SB_RDONLY; + goto out; + } + ro_rw = sb_rdonly(sb) && !(fc->sb_flags & SB_RDONLY); if (ro_rw && (sbi->flags & NTFS_FLAGS_NEED_REPLAY)) { errorf(fc, @@ -427,8 +433,6 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) fc, "ntfs3: Cannot use different iocharset when remounting!"); - sync_filesystem(sb); - if (ro_rw && (sbi->volume.flags & VOLUME_FLAG_DIRTY) && !new_opts->force) { errorf(fc, @@ -436,6 +440,8 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) return -EINVAL; } +out: + sync_filesystem(sb); swap(sbi->options, fc->fs_private); return 0; @@ -1613,6 +1619,8 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) } #endif + if (is_legacy_ntfs(sb)) + sb->s_flags |= SB_RDONLY; return 0; put_inode_out: @@ -1730,7 +1738,7 @@ static const struct fs_context_operations ntfs_context_ops = { * This will called when mount/remount. We will first initialize * options so that if remount we can use just that. */ -static int ntfs_init_fs_context(struct fs_context *fc) +static int __ntfs_init_fs_context(struct fs_context *fc) { struct ntfs_mount_options *opts; struct ntfs_sb_info *sbi; @@ -1778,6 +1786,11 @@ static int ntfs_init_fs_context(struct fs_context *fc) return -ENOMEM; } +static int ntfs_init_fs_context(struct fs_context *fc) +{ + return __ntfs_init_fs_context(fc); +} + static void ntfs3_kill_sb(struct super_block *sb) { struct ntfs_sb_info *sbi = sb->s_fs_info; @@ -1800,10 +1813,20 @@ static struct file_system_type ntfs_fs_type = { }; #if IS_ENABLED(CONFIG_NTFS_FS) +static int ntfs_legacy_init_fs_context(struct fs_context *fc) +{ + int ret; + + ret = __ntfs_init_fs_context(fc); + /* If ntfs3 is used as legacy ntfs enforce read-only mode. */ + fc->sb_flags |= SB_RDONLY; + return ret; +} + static struct file_system_type ntfs_legacy_fs_type = { .owner = THIS_MODULE, .name = "ntfs", - .init_fs_context = ntfs_init_fs_context, + .init_fs_context = ntfs_legacy_init_fs_context, .parameters = ntfs_fs_parameters, .kill_sb = ntfs3_kill_sb, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, @@ -1821,9 +1844,14 @@ static inline void unregister_as_ntfs_legacy(void) { unregister_filesystem(&ntfs_legacy_fs_type); } +bool is_legacy_ntfs(struct super_block *sb) +{ + return sb->s_type == &ntfs_legacy_fs_type; +} #else static inline void register_as_ntfs_legacy(void) {} static inline void unregister_as_ntfs_legacy(void) {} +bool is_legacy_ntfs(struct super_block *sb) { return false; } #endif From 9b872cc50daa7d1cb07d5bfd27ee9fa3f4e7eda9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 16 Apr 2024 12:20:50 +0200 Subject: [PATCH 188/313] ntfs3: add legacy ntfs file operations To ensure that ioctl()s can't be used to circumvent write restrictions. Signed-off-by: Christian Brauner --- fs/ntfs3/dir.c | 7 +++++++ fs/ntfs3/file.c | 8 ++++++++ fs/ntfs3/inode.c | 20 ++++++++++++++++---- fs/ntfs3/ntfs_fs.h | 2 ++ 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 5cf3d9decf64..263635199b60 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -616,4 +616,11 @@ const struct file_operations ntfs_dir_operations = { .compat_ioctl = ntfs_compat_ioctl, #endif }; + +const struct file_operations ntfs_legacy_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate_shared = ntfs_readdir, + .open = ntfs_file_open, +}; // clang-format on diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 5418662c80d8..b73969e05052 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1236,4 +1236,12 @@ const struct file_operations ntfs_file_operations = { .fallocate = ntfs_fallocate, .release = ntfs_file_release, }; + +const struct file_operations ntfs_legacy_file_operations = { + .llseek = generic_file_llseek, + .read_iter = ntfs_file_read_iter, + .splice_read = ntfs_file_splice_read, + .open = ntfs_file_open, + .release = ntfs_file_release, +}; // clang-format on diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index eb7a8c9fba01..d273eda1cf45 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -440,7 +440,10 @@ static struct inode *ntfs_read_mft(struct inode *inode, * Usually a hard links to directories are disabled. */ inode->i_op = &ntfs_dir_inode_operations; - inode->i_fop = &ntfs_dir_operations; + if (is_legacy_ntfs(inode->i_sb)) + inode->i_fop = &ntfs_legacy_dir_operations; + else + inode->i_fop = &ntfs_dir_operations; ni->i_valid = 0; } else if (S_ISLNK(mode)) { ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY; @@ -450,7 +453,10 @@ static struct inode *ntfs_read_mft(struct inode *inode, } else if (S_ISREG(mode)) { ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY; inode->i_op = &ntfs_file_inode_operations; - inode->i_fop = &ntfs_file_operations; + if (is_legacy_ntfs(inode->i_sb)) + inode->i_fop = &ntfs_legacy_file_operations; + else + inode->i_fop = &ntfs_file_operations; inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops; if (ino != MFT_REC_MFT) @@ -1614,7 +1620,10 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, if (S_ISDIR(mode)) { inode->i_op = &ntfs_dir_inode_operations; - inode->i_fop = &ntfs_dir_operations; + if (is_legacy_ntfs(inode->i_sb)) + inode->i_fop = &ntfs_legacy_dir_operations; + else + inode->i_fop = &ntfs_dir_operations; } else if (S_ISLNK(mode)) { inode->i_op = &ntfs_link_inode_operations; inode->i_fop = NULL; @@ -1623,7 +1632,10 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, inode_nohighmem(inode); } else if (S_ISREG(mode)) { inode->i_op = &ntfs_file_inode_operations; - inode->i_fop = &ntfs_file_operations; + if (is_legacy_ntfs(inode->i_sb)) + inode->i_fop = &ntfs_legacy_file_operations; + else + inode->i_fop = &ntfs_file_operations; inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops; init_rwsem(&ni->file.run_lock); diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 184c8bc76b92..5f4d288c6adf 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -493,6 +493,7 @@ struct inode *dir_search_u(struct inode *dir, const struct cpu_str *uni, struct ntfs_fnd *fnd); bool dir_is_empty(struct inode *dir); extern const struct file_operations ntfs_dir_operations; +extern const struct file_operations ntfs_legacy_dir_operations; /* Globals from file.c */ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, @@ -507,6 +508,7 @@ long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg); extern const struct inode_operations ntfs_special_inode_operations; extern const struct inode_operations ntfs_file_inode_operations; extern const struct file_operations ntfs_file_operations; +extern const struct file_operations ntfs_legacy_file_operations; /* Globals from frecord.c */ void ni_remove_mi(struct ntfs_inode *ni, struct mft_inode *mi); From 619606a7b8d5e54b71578ecc988d3f8e1896bbc6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 17 Apr 2024 09:47:19 +0100 Subject: [PATCH 189/313] netfs: Fix writethrough-mode error handling Fix the error return in netfs_perform_write() acting in writethrough-mode to return any cached error in the case that netfs_end_writethrough() returns 0. This can affect the use of O_SYNC/O_DSYNC/RWF_SYNC/RWF_DSYNC in 9p and afs. Fixes: 41d8e7673a77 ("netfs: Implement a write-through caching option") Signed-off-by: David Howells Link: https://lore.kernel.org/r/6736.1713343639@warthog.procyon.org.uk Reviewed-by: Jeff Layton cc: Eric Van Hensbergen cc: Latchesar Ionkov cc: Dominique Martinet cc: Christian Schoenebeck cc: Marc Dionne cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: v9fs@lists.linux.dev cc: linux-afs@lists.infradead.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 9a0d32e4b422..8f13ca8fbc74 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -164,7 +164,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, enum netfs_how_to_modify howto; enum netfs_folio_trace trace; unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC; - ssize_t written = 0, ret; + ssize_t written = 0, ret, ret2; loff_t i_size, pos = iocb->ki_pos, from, to; size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; bool maybe_trouble = false; @@ -395,10 +395,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, out: if (unlikely(wreq)) { - ret = netfs_end_writethrough(wreq, iocb); + ret2 = netfs_end_writethrough(wreq, iocb); wbc_detach_inode(&wbc); - if (ret == -EIOCBQUEUED) - return ret; + if (ret2 == -EIOCBQUEUED) + return ret2; + if (ret == 0) + ret = ret2; } iocb->ki_pos += written; From f42c97027fb75776e2e9358d16bf4a99aeb04cf2 Mon Sep 17 00:00:00 2001 From: Daniel Okazaki Date: Mon, 22 Apr 2024 17:43:36 +0000 Subject: [PATCH 190/313] eeprom: at24: fix memory corruption race condition If the eeprom is not accessible, an nvmem device will be registered, the read will fail, and the device will be torn down. If another driver accesses the nvmem device after the teardown, it will reference invalid memory. Move the failure point before registering the nvmem device. Signed-off-by: Daniel Okazaki Fixes: b20eb4c1f026 ("eeprom: at24: drop unnecessary label") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240422174337.2487142-1-dtokazaki@google.com Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 572333ead5fb..4bd4f32bcdab 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -758,15 +758,6 @@ static int at24_probe(struct i2c_client *client) } pm_runtime_enable(dev); - at24->nvmem = devm_nvmem_register(dev, &nvmem_config); - if (IS_ERR(at24->nvmem)) { - pm_runtime_disable(dev); - if (!pm_runtime_status_suspended(dev)) - regulator_disable(at24->vcc_reg); - return dev_err_probe(dev, PTR_ERR(at24->nvmem), - "failed to register nvmem\n"); - } - /* * Perform a one-byte test read to verify that the chip is functional, * unless powering on the device is to be avoided during probe (i.e. @@ -782,6 +773,15 @@ static int at24_probe(struct i2c_client *client) } } + at24->nvmem = devm_nvmem_register(dev, &nvmem_config); + if (IS_ERR(at24->nvmem)) { + pm_runtime_disable(dev); + if (!pm_runtime_status_suspended(dev)) + regulator_disable(at24->vcc_reg); + return dev_err_probe(dev, PTR_ERR(at24->nvmem), + "failed to register nvmem\n"); + } + /* If this a SPD EEPROM, probe for DDR3 thermal sensor */ if (cdata == &at24_data_spd) at24_probe_temp_sensor(client); From 467324bcfe1a31ec65d0cf4aa59421d6b7a7d52b Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 19 Apr 2024 10:04:56 +0800 Subject: [PATCH 191/313] ax25: Fix netdev refcount issue The dev_tracker is added to ax25_cb in ax25_bind(). When the ax25 device is detaching, the dev_tracker of ax25_cb should be deallocated in ax25_kill_by_device() instead of the dev_tracker of ax25_dev. The log reported by ref_tracker is shown below: [ 80.884935] ref_tracker: reference already released. [ 80.885150] ref_tracker: allocated in: [ 80.885349] ax25_dev_device_up+0x105/0x540 [ 80.885730] ax25_device_event+0xa4/0x420 [ 80.885730] notifier_call_chain+0xc9/0x1e0 [ 80.885730] __dev_notify_flags+0x138/0x280 [ 80.885730] dev_change_flags+0xd7/0x180 [ 80.885730] dev_ifsioc+0x6a9/0xa30 [ 80.885730] dev_ioctl+0x4d8/0xd90 [ 80.885730] sock_do_ioctl+0x1c2/0x2d0 [ 80.885730] sock_ioctl+0x38b/0x4f0 [ 80.885730] __se_sys_ioctl+0xad/0xf0 [ 80.885730] do_syscall_64+0xc4/0x1b0 [ 80.885730] entry_SYSCALL_64_after_hwframe+0x67/0x6f [ 80.885730] ref_tracker: freed in: [ 80.885730] ax25_device_event+0x272/0x420 [ 80.885730] notifier_call_chain+0xc9/0x1e0 [ 80.885730] dev_close_many+0x272/0x370 [ 80.885730] unregister_netdevice_many_notify+0x3b5/0x1180 [ 80.885730] unregister_netdev+0xcf/0x120 [ 80.885730] sixpack_close+0x11f/0x1b0 [ 80.885730] tty_ldisc_kill+0xcb/0x190 [ 80.885730] tty_ldisc_hangup+0x338/0x3d0 [ 80.885730] __tty_hangup+0x504/0x740 [ 80.885730] tty_release+0x46e/0xd80 [ 80.885730] __fput+0x37f/0x770 [ 80.885730] __x64_sys_close+0x7b/0xb0 [ 80.885730] do_syscall_64+0xc4/0x1b0 [ 80.885730] entry_SYSCALL_64_after_hwframe+0x67/0x6f [ 80.893739] ------------[ cut here ]------------ [ 80.894030] WARNING: CPU: 2 PID: 140 at lib/ref_tracker.c:255 ref_tracker_free+0x47b/0x6b0 [ 80.894297] Modules linked in: [ 80.894929] CPU: 2 PID: 140 Comm: ax25_conn_rel_6 Not tainted 6.9.0-rc4-g8cd26fd90c1a #11 [ 80.895190] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qem4 [ 80.895514] RIP: 0010:ref_tracker_free+0x47b/0x6b0 [ 80.895808] Code: 83 c5 18 4c 89 eb 48 c1 eb 03 8a 04 13 84 c0 0f 85 df 01 00 00 41 83 7d 00 00 75 4b 4c 89 ff 9 [ 80.896171] RSP: 0018:ffff888009edf8c0 EFLAGS: 00000286 [ 80.896339] RAX: 1ffff1100141ac00 RBX: 1ffff1100149463b RCX: dffffc0000000000 [ 80.896502] RDX: 0000000000000001 RSI: 0000000000000246 RDI: ffff88800a0d6518 [ 80.896925] RBP: ffff888009edf9b0 R08: ffff88806d3288d3 R09: 1ffff1100da6511a [ 80.897212] R10: dffffc0000000000 R11: ffffed100da6511b R12: ffff88800a4a31d4 [ 80.897859] R13: ffff88800a4a31d8 R14: dffffc0000000000 R15: ffff88800a0d6518 [ 80.898279] FS: 00007fd88b7fe700(0000) GS:ffff88806d300000(0000) knlGS:0000000000000000 [ 80.899436] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 80.900181] CR2: 00007fd88c001d48 CR3: 000000000993e000 CR4: 00000000000006f0 ... [ 80.935774] ref_tracker: sp%d@000000000bb9df3d has 1/1 users at [ 80.935774] ax25_bind+0x424/0x4e0 [ 80.935774] __sys_bind+0x1d9/0x270 [ 80.935774] __x64_sys_bind+0x75/0x80 [ 80.935774] do_syscall_64+0xc4/0x1b0 [ 80.935774] entry_SYSCALL_64_after_hwframe+0x67/0x6f Change ax25_dev->dev_tracker to the dev_tracker of ax25_cb in order to mitigate the bug. Fixes: feef318c855a ("ax25: fix UAF bugs of net_device caused by rebinding operation") Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20240419020456.29826-1-duoming@zju.edu.cn Signed-off-by: Paolo Abeni --- net/ax25/af_ax25.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 558e158c98d0..9169efb2f43a 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -103,7 +103,7 @@ static void ax25_kill_by_device(struct net_device *dev) s->ax25_dev = NULL; if (sk->sk_socket) { netdev_put(ax25_dev->dev, - &ax25_dev->dev_tracker); + &s->dev_tracker); ax25_dev_put(ax25_dev); } ax25_cb_del(s); From 7ca803b489455b9242c81b4befe546ea3a692e5c Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 18 Mar 2024 12:32:37 +0100 Subject: [PATCH 192/313] soc: mediatek: mtk-svs: Append "-thermal" to thermal zone names The thermal framework registers thermal zones as specified in DT and including the "-thermal" suffix: append that to the driver specified tzone_name to actually match the thermal zone name as registered by the thermal API. Fixes: 2bfbf82956e2 ("soc: mediatek: mtk-svs: Constify runtime-immutable members of svs_bank") Link: https://lore.kernel.org/r/20240318113237.125802-1-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- drivers/soc/mediatek/mtk-svs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/soc/mediatek/mtk-svs.c b/drivers/soc/mediatek/mtk-svs.c index c832f5c670bc..9a91298c1253 100644 --- a/drivers/soc/mediatek/mtk-svs.c +++ b/drivers/soc/mediatek/mtk-svs.c @@ -1768,6 +1768,7 @@ static int svs_bank_resource_setup(struct svs_platform *svsp) const struct svs_bank_pdata *bdata; struct svs_bank *svsb; struct dev_pm_opp *opp; + char tz_name_buf[20]; unsigned long freq; int count, ret; u32 idx, i; @@ -1819,10 +1820,12 @@ static int svs_bank_resource_setup(struct svs_platform *svsp) } if (!IS_ERR_OR_NULL(bdata->tzone_name)) { - svsb->tzd = thermal_zone_get_zone_by_name(bdata->tzone_name); + snprintf(tz_name_buf, ARRAY_SIZE(tz_name_buf), + "%s-thermal", bdata->tzone_name); + svsb->tzd = thermal_zone_get_zone_by_name(tz_name_buf); if (IS_ERR(svsb->tzd)) { dev_err(svsb->dev, "cannot get \"%s\" thermal zone\n", - bdata->tzone_name); + tz_name_buf); return PTR_ERR(svsb->tzd); } } From ab6cd6bb33cc0bbb8dbf8cc264a1013b2019561e Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 7 Feb 2024 17:42:41 +0000 Subject: [PATCH 193/313] soc: mediatek: mtk-socinfo: depends on CONFIG_SOC_BUS The mtk-socinfo driver uses symbols 'soc_device_register' and 'soc_device_unregister' which are part of the bus driver for System-on-Chip devices. Select SOC_BUS to make sure that driver is built and the symbols are available. Fixes: 423a54da3c7e ("soc: mediatek: mtk-socinfo: Add driver for getting chip information") Signed-off-by: Daniel Golle Reviewed-by: Chen-Yu Tsai Reviewed-by: Matthias Brugger Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/cc8f7f7da5bdccce514a320e0ae7468659cf7346.1707327680.git.daniel@makrotopia.org Signed-off-by: AngeloGioacchino Del Regno --- drivers/soc/mediatek/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig index 50c664b65f4d..1b7afb19ccd6 100644 --- a/drivers/soc/mediatek/Kconfig +++ b/drivers/soc/mediatek/Kconfig @@ -72,6 +72,7 @@ config MTK_SOCINFO tristate "MediaTek SoC Information" default y depends on NVMEM_MTK_EFUSE + select SOC_BUS help The MediaTek SoC Information (mtk-socinfo) driver provides information about the SoC to the userspace including the From a44f2eb106a46f2275a79de54ce0ea63e4f3d8c8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Apr 2024 19:08:26 -0700 Subject: [PATCH 194/313] tools: ynl: don't ignore errors in NLMSG_DONE messages NLMSG_DONE contains an error code, it has to be extracted. Prior to this change all dumps will end in success, and in case of failure the result is silently truncated. Fixes: e4b48ed460d3 ("tools: ynl: add a completely generic client") Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20240420020827.3288615-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/net/ynl/lib/ynl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 5fa7957f6e0f..25810e18b0a7 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -182,6 +182,7 @@ class NlMsg: self.done = 1 extack_off = 20 elif self.nl_type == Netlink.NLMSG_DONE: + self.error = struct.unpack("i", self.raw[0:4])[0] self.done = 1 extack_off = 4 From 5ea6764d9095e234b024054f75ebbccc4f0eb146 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 9 Apr 2024 16:38:37 +0200 Subject: [PATCH 195/313] riscv: hwprobe: fix invalid sign extension for RISCV_HWPROBE_EXT_ZVFHMIN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current definition yields a negative 32bits signed value which result in a mask with is obviously incorrect. Replace it by using a 1ULL bit shift value to obtain a single set bit mask. Fixes: 5dadda5e6a59 ("riscv: hwprobe: export Zvfh[min] ISA extensions") Signed-off-by: Clément Léger Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240409143839.558784-1-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/hwprobe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 9f2a8e3ff204..2902f68dc913 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -54,7 +54,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZFHMIN (1 << 28) #define RISCV_HWPROBE_EXT_ZIHINTNTL (1 << 29) #define RISCV_HWPROBE_EXT_ZVFH (1 << 30) -#define RISCV_HWPROBE_EXT_ZVFHMIN (1 << 31) +#define RISCV_HWPROBE_EXT_ZVFHMIN (1ULL << 31) #define RISCV_HWPROBE_EXT_ZFA (1ULL << 32) #define RISCV_HWPROBE_EXT_ZTSO (1ULL << 33) #define RISCV_HWPROBE_EXT_ZACAS (1ULL << 34) From 68301ef471b63f25d6e6144a0820fea52257a34a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 23 Apr 2024 21:19:36 +0200 Subject: [PATCH 196/313] ACPI: PM: s2idle: Evaluate all Low-Power S0 Idle _DSM functions Commit 073237281a50 ("ACPI: PM: s2idle: Enable Low-Power S0 Idle MSFT UUID for non-AMD systems") attempted to avoid evaluating the same Low- Power S0 Idle _DSM functions for different UUIDs, but that turns out to be a mistake, because some systems in the field are adversely affected by it. Address this by allowing all Low-Power S0 Idle _DSM functions to be evaluated, but still print the message regarding duplication of Low- Power S0 Idle _DSM function sets for different UUIDs. Fixes: 073237281a50 ("ACPI: PM: s2idle: Enable Low-Power S0 Idle MSFT UUID for non-AMD systems") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218750 Reported-and-tested-by: Mark Pearson Suggested-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello --- drivers/acpi/x86/s2idle.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index cd84af23f7ea..dd0b40b9bbe8 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -492,16 +492,14 @@ static int lps0_device_attach(struct acpi_device *adev, unsigned int func_mask; /* - * Avoid evaluating the same _DSM function for two - * different UUIDs and prioritize the MSFT one. + * Log a message if the _DSM function sets for two + * different UUIDs overlap. */ func_mask = lps0_dsm_func_mask & lps0_dsm_func_mask_microsoft; - if (func_mask) { + if (func_mask) acpi_handle_info(adev->handle, "Duplicate LPS0 _DSM functions (mask: 0x%x)\n", func_mask); - lps0_dsm_func_mask &= ~func_mask; - } } } From 9c8ecb9308d8013ff9ac9d36fdd8ae746033b93c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Apr 2024 17:48:43 -0400 Subject: [PATCH 197/313] Revert "NFSD: Reschedule CB operations when backchannel rpc_clnt is shut down" The reverted commit attempted to enable NFSD to retransmit pending callback operations if an NFS client disconnects, but unintentionally introduces a hazardous behavior regression if the client becomes permanently unreachable while callback operations are still pending. A disconnect can occur due to network partition or if the NFS server needs to force the NFS client to retransmit (for example, if a GSS window under-run occurs). Reverting the commit will make NFSD behave the same as it did in v6.8 and before. Pending callback operations are permanently lost if the client connection is terminated before the client receives them. For some callback operations, this loss is not harmful. However, for CB_RECALL, the loss means a delegation might be revoked unnecessarily. For CB_OFFLOAD, pending COPY operations will never complete unless the NFS client subsequently sends an OFFLOAD_STATUS operation, which the Linux NFS client does not currently implement. These issues still need to be addressed somehow. Reported-by: Dai Ngo Link: https://bugzilla.kernel.org/show_bug.cgi?id=218735 Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index e440f72b9d4e..d153af81f406 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -986,14 +986,6 @@ static bool nfsd4_queue_cb(struct nfsd4_callback *cb) return queue_delayed_work(callback_wq, &cb->cb_work, 0); } -static void nfsd4_queue_cb_delayed(struct nfsd4_callback *cb, - unsigned long msecs) -{ - trace_nfsd_cb_queue(cb->cb_clp, cb); - queue_delayed_work(callback_wq, &cb->cb_work, - msecs_to_jiffies(msecs)); -} - static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) { atomic_inc(&clp->cl_cb_inflight); @@ -1502,16 +1494,8 @@ nfsd4_run_cb_work(struct work_struct *work) clnt = clp->cl_cb_client; if (!clnt) { - if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) - nfsd41_destroy_cb(cb); - else { - /* - * XXX: Ideally, we could wait for the client to - * reconnect, but I haven't figured out how - * to do that yet. - */ - nfsd4_queue_cb_delayed(cb, 25); - } + /* Callback channel broken, or client killed; give up: */ + nfsd41_destroy_cb(cb); return; } From 8ddb7142c8ab37371c6fd167a8aded97922c6268 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Apr 2024 17:52:24 -0400 Subject: [PATCH 198/313] Revert "NFSD: Convert the callback workqueue to use delayed_work" This commit was a pre-requisite for commit c1ccfcf1a9bf ("NFSD: Reschedule CB operations when backchannel rpc_clnt is shut down"), which has already been reverted. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 6 +++--- fs/nfsd/state.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index d153af81f406..6aff46701aa1 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -983,7 +983,7 @@ static struct workqueue_struct *callback_wq; static bool nfsd4_queue_cb(struct nfsd4_callback *cb) { trace_nfsd_cb_queue(cb->cb_clp, cb); - return queue_delayed_work(callback_wq, &cb->cb_work, 0); + return queue_work(callback_wq, &cb->cb_work); } static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) @@ -1482,7 +1482,7 @@ static void nfsd4_run_cb_work(struct work_struct *work) { struct nfsd4_callback *cb = - container_of(work, struct nfsd4_callback, cb_work.work); + container_of(work, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; int flags; @@ -1528,7 +1528,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_resp = cb; cb->cb_ops = ops; - INIT_DELAYED_WORK(&cb->cb_work, nfsd4_run_cb_work); + INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); cb->cb_status = 0; cb->cb_need_restart = false; cb->cb_holds_slot = false; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 01c6f3445646..2ed0fcf879fd 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -68,7 +68,7 @@ struct nfsd4_callback { struct nfs4_client *cb_clp; struct rpc_message cb_msg; const struct nfsd4_callback_ops *cb_ops; - struct delayed_work cb_work; + struct work_struct cb_work; int cb_seq_status; int cb_status; bool cb_need_restart; From 3584718cf2ec7e79b6814f2596dcf398c5fb2eca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 17:52:48 +0000 Subject: [PATCH 199/313] net: fix sk_memory_allocated_{add|sub} vs softirqs Jonathan Heathcote reported a regression caused by blamed commit on aarch64 architecture. x86 happens to have irq-safe __this_cpu_add_return() and __this_cpu_sub(), but this is not generic. I think my confusion came from "struct sock" argument, because these helpers are called with a locked socket. But the memory accounting is per-proto (and per-cpu after the blamed commit). We might cleanup these helpers later to directly accept a "struct proto *proto" argument. Switch to this_cpu_add_return() and this_cpu_xchg() operations, and get rid of preempt_disable()/preempt_enable() pairs. Fast path becomes a bit faster as a result :) Many thanks to Jonathan Heathcote for his awesome report and investigations. Fixes: 3cd3399dd7a8 ("net: implement per-cpu reserves for memory_allocated") Reported-by: Jonathan Heathcote Closes: https://lore.kernel.org/netdev/VI1PR01MB42407D7947B2EA448F1E04EFD10D2@VI1PR01MB4240.eurprd01.prod.exchangelabs.com/ Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20240421175248.1692552-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index f57bfd8a2ad2..b4b553df7870 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1410,32 +1410,34 @@ sk_memory_allocated(const struct sock *sk) #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) extern int sysctl_mem_pcpu_rsv; -static inline void -sk_memory_allocated_add(struct sock *sk, int amt) +static inline void proto_memory_pcpu_drain(struct proto *proto) { - int local_reserve; + int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0); - preempt_disable(); - local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) { - __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); - atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); - } - preempt_enable(); + if (val) + atomic_long_add(val, proto->memory_allocated); } static inline void -sk_memory_allocated_sub(struct sock *sk, int amt) +sk_memory_allocated_add(const struct sock *sk, int val) { - int local_reserve; + struct proto *proto = sk->sk_prot; - preempt_disable(); - local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) { - __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); - atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); - } - preempt_enable(); + val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val); + + if (unlikely(val >= READ_ONCE(sysctl_mem_pcpu_rsv))) + proto_memory_pcpu_drain(proto); +} + +static inline void +sk_memory_allocated_sub(const struct sock *sk, int val) +{ + struct proto *proto = sk->sk_prot; + + val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val); + + if (unlikely(val <= -READ_ONCE(sysctl_mem_pcpu_rsv))) + proto_memory_pcpu_drain(proto); } #define SK_ALLOC_PERCPU_COUNTER_BATCH 16 From 58a4c9b1e5a3e53c9148e80b90e1e43897ce77d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 18:43:26 +0000 Subject: [PATCH 200/313] ipv4: check for NULL idev in ip_route_use_hint() syzbot was able to trigger a NULL deref in fib_validate_source() in an old tree [1]. It appears the bug exists in latest trees. All calls to __in_dev_get_rcu() must be checked for a NULL result. [1] general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 2 PID: 3257 Comm: syz-executor.3 Not tainted 5.10.0-syzkaller #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:fib_validate_source+0xbf/0x15a0 net/ipv4/fib_frontend.c:425 Code: 18 f2 f2 f2 f2 42 c7 44 20 23 f3 f3 f3 f3 48 89 44 24 78 42 c6 44 20 27 f3 e8 5d 88 48 fc 4c 89 e8 48 c1 e8 03 48 89 44 24 18 <42> 80 3c 20 00 74 08 4c 89 ef e8 d2 15 98 fc 48 89 5c 24 10 41 bf RSP: 0018:ffffc900015fee40 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88800f7a4000 RCX: ffff88800f4f90c0 RDX: 0000000000000000 RSI: 0000000004001eac RDI: ffff8880160c64c0 RBP: ffffc900015ff060 R08: 0000000000000000 R09: ffff88800f7a4000 R10: 0000000000000002 R11: ffff88800f4f90c0 R12: dffffc0000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88800f7a4000 FS: 00007f938acfe6c0(0000) GS:ffff888058c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f938acddd58 CR3: 000000001248e000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ip_route_use_hint+0x410/0x9b0 net/ipv4/route.c:2231 ip_rcv_finish_core+0x2c4/0x1a30 net/ipv4/ip_input.c:327 ip_list_rcv_finish net/ipv4/ip_input.c:612 [inline] ip_sublist_rcv+0x3ed/0xe50 net/ipv4/ip_input.c:638 ip_list_rcv+0x422/0x470 net/ipv4/ip_input.c:673 __netif_receive_skb_list_ptype net/core/dev.c:5572 [inline] __netif_receive_skb_list_core+0x6b1/0x890 net/core/dev.c:5620 __netif_receive_skb_list net/core/dev.c:5672 [inline] netif_receive_skb_list_internal+0x9f9/0xdc0 net/core/dev.c:5764 netif_receive_skb_list+0x55/0x3e0 net/core/dev.c:5816 xdp_recv_frames net/bpf/test_run.c:257 [inline] xdp_test_run_batch net/bpf/test_run.c:335 [inline] bpf_test_run_xdp_live+0x1818/0x1d00 net/bpf/test_run.c:363 bpf_prog_test_run_xdp+0x81f/0x1170 net/bpf/test_run.c:1376 bpf_prog_test_run+0x349/0x3c0 kernel/bpf/syscall.c:3736 __sys_bpf+0x45c/0x710 kernel/bpf/syscall.c:5115 __do_sys_bpf kernel/bpf/syscall.c:5201 [inline] __se_sys_bpf kernel/bpf/syscall.c:5199 [inline] __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:5199 Fixes: 02b24941619f ("ipv4: use dst hint for ipv4 list receive") Reported-by: syzbot Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240421184326.1704930-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d36ace160d42..b814fdab19f7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2166,6 +2166,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, int err = -EINVAL; u32 tag = 0; + if (!in_dev) + return -EINVAL; + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) goto martian_source; From 4ce62d5b2f7aecd4900e7d6115588ad7f9acccca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 19:38:28 +0000 Subject: [PATCH 201/313] net: usb: ax88179_178a: stop lying about skb->truesize Some usb drivers try to set small skb->truesize and break core networking stacks. In this patch, I removed one of the skb->truesize overide. I also replaced one skb_clone() by an allocation of a fresh and small skb, to get minimally sized skbs, like we did in commit 1e2c61172342 ("net: cdc_ncm: reduce skb truesize in rx path") Fixes: f8ebb3ac881b ("net: usb: ax88179_178a: Fix packet receiving") Reported-by: shironeko Closes: https://lore.kernel.org/netdev/c110f41a0d2776b525930f213ca9715c@tesaguri.club/ Signed-off-by: Eric Dumazet Cc: Jose Alonso Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240421193828.1966195-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/ax88179_178a.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 752f821a1990..df9d767cb524 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1456,21 +1456,16 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* Skip IP alignment pseudo header */ skb_pull(skb, 2); - skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd); ax88179_rx_checksum(skb, pkt_hdr); return 1; } - ax_skb = skb_clone(skb, GFP_ATOMIC); + ax_skb = netdev_alloc_skb_ip_align(dev->net, pkt_len); if (!ax_skb) return 0; - skb_trim(ax_skb, pkt_len); + skb_put(ax_skb, pkt_len); + memcpy(ax_skb->data, skb->data + 2, pkt_len); - /* Skip IP alignment pseudo header */ - skb_pull(ax_skb, 2); - - skb->truesize = pkt_len_plus_padd + - SKB_DATA_ALIGN(sizeof(struct sk_buff)); ax88179_rx_checksum(ax_skb, pkt_hdr); usbnet_skb_return(dev, ax_skb); From 80e679b352c3ce5158f3f778cfb77eb767e586fb Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 22 Apr 2024 05:33:40 -0400 Subject: [PATCH 202/313] tcp: Fix Use-After-Free in tcp_ao_connect_init Since call_rcu, which is called in the hlist_for_each_entry_rcu traversal of tcp_ao_connect_init, is not part of the RCU read critical section, it is possible that the RCU grace period will pass during the traversal and the key will be free. To prevent this, it should be changed to hlist_for_each_entry_safe. Fixes: 7c2ffaf21bd6 ("net/tcp: Calculate TCP-AO traffic keys") Signed-off-by: Hyunwoo Kim Reviewed-by: Eric Dumazet Acked-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/ZiYu9NJ/ClR8uSkH@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ao.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 3afeeb68e8a7..781b67a52571 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1068,6 +1068,7 @@ void tcp_ao_connect_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_ao_info *ao_info; + struct hlist_node *next; union tcp_ao_addr *addr; struct tcp_ao_key *key; int family, l3index; @@ -1090,7 +1091,7 @@ void tcp_ao_connect_init(struct sock *sk) l3index = l3mdev_master_ifindex_by_index(sock_net(sk), sk->sk_bound_dev_if); - hlist_for_each_entry_rcu(key, &ao_info->head, node) { + hlist_for_each_entry_safe(key, next, &ao_info->head, node) { if (!tcp_ao_key_cmp(key, l3index, addr, key->prefixlen, family, -1, -1)) continue; From 2eb9dd497a698dc384c0dd3e0311d541eb2e13dd Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 2 Nov 2023 04:21:55 +0000 Subject: [PATCH 203/313] drm/amd/display: Set color_mgmt_changed to true on unsuspend Otherwise we can end up with a frame on unsuspend where color management is not applied when userspace has not committed themselves. Fixes re-applying color management on Steam Deck/Gamescope on S3 resume. Signed-off-by: Joshua Ashton Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6d2f60c61dec..f3f94d109726 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3029,6 +3029,7 @@ static int dm_resume(void *handle) dc_stream_release(dm_new_crtc_state->stream); dm_new_crtc_state->stream = NULL; } + dm_new_crtc_state->base.color_mgmt_changed = true; } for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) { From 37865e02e6ccecdda240f33b4332105a5c734984 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 17 Apr 2024 21:13:59 -0400 Subject: [PATCH 204/313] drm/amdkfd: Fix eviction fence handling Handle case that dma_fence_get_rcu_safe returns NULL. If restore work is already scheduled, only update its timer. The same work item cannot be queued twice, so undo the extra queue eviction. Fixes: 9a1c1339abf9 ("drm/amdkfd: Run restore_workers on freezable WQs") Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Tested-by: Gang BA Reviewed-by: Gang BA Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index b79986412cd8..aafdf064651f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1922,6 +1922,8 @@ static int signal_eviction_fence(struct kfd_process *p) rcu_read_lock(); ef = dma_fence_get_rcu_safe(&p->ef); rcu_read_unlock(); + if (!ef) + return -EINVAL; ret = dma_fence_signal(ef); dma_fence_put(ef); @@ -1949,10 +1951,9 @@ static void evict_process_worker(struct work_struct *work) * they are responsible stopping the queues and scheduling * the restore work. */ - if (!signal_eviction_fence(p)) - queue_delayed_work(kfd_restore_wq, &p->restore_work, - msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); - else + if (signal_eviction_fence(p) || + mod_delayed_work(kfd_restore_wq, &p->restore_work, + msecs_to_jiffies(PROCESS_RESTORE_TIME_MS))) kfd_process_restore_queues(p); pr_debug("Finished evicting pasid 0x%x\n", p->pasid); From 25e9227c6afd200bed6774c866980b8e36d033af Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Thu, 18 Apr 2024 11:32:34 -0400 Subject: [PATCH 205/313] drm/amdgpu: Fix leak when GPU memory allocation fails Free the sync object if the memory allocation fails for any reason. Signed-off-by: Mukul Joshi Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index df58a6a1a67e..7c23ba19af33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1854,6 +1854,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( err_bo_create: amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id); err_reserve_limit: + amdgpu_sync_free(&(*mem)->sync); mutex_destroy(&(*mem)->lock); if (gobj) drm_gem_object_put(gobj); From 9c783a11214553a54f0915a7260a3ce624d36bf2 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Sun, 7 Apr 2024 12:36:00 +0800 Subject: [PATCH 206/313] drm/amdkfd: make sure VM is ready for updating operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When page table BOs were evicted but not validated before updating page tables, VM is still in evicting state, amdgpu_vm_update_range returns -EBUSY and restore_process_worker runs into a dead loop. v2: Split the BO validation and page table update into two separate loops in amdgpu_amdkfd_restore_process_bos. (Felix) 1.Validate BOs 2.Validate VM (and DMABuf attachments) 3.Update page tables for the BOs validated above Fixes: 50661eb1a2c8 ("drm/amdgpu: Auto-validate DMABuf imports in compute VMs") Signed-off-by: Lang Yu Acked-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7c23ba19af33..2131de36e3da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2901,13 +2901,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * amdgpu_sync_create(&sync_obj); - /* Validate BOs and map them to GPUVM (update VM page tables). */ + /* Validate BOs managed by KFD */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; - struct kfd_mem_attachment *attachment; struct dma_resv_iter cursor; struct dma_fence *fence; @@ -2932,6 +2931,25 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * goto validate_map_fail; } } + } + + if (failed_size) + pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); + + /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO + * validations above would invalidate DMABuf imports again. + */ + ret = process_validate_vms(process_info, &exec.ticket); + if (ret) { + pr_debug("Validating VMs failed, ret: %d\n", ret); + goto validate_map_fail; + } + + /* Update mappings managed by KFD. */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list) { + struct kfd_mem_attachment *attachment; + list_for_each_entry(attachment, &mem->attachments, list) { if (!attachment->is_mapped) continue; @@ -2948,18 +2966,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * } } - if (failed_size) - pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); - - /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO - * validations above would invalidate DMABuf imports again. - */ - ret = process_validate_vms(process_info, &exec.ticket); - if (ret) { - pr_debug("Validating VMs failed, ret: %d\n", ret); - goto validate_map_fail; - } - /* Update mappings not managed by KFD */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { From 0e95ed6452cb079cf9587c774a475a7d83c7e040 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Tue, 16 Apr 2024 17:30:12 +0800 Subject: [PATCH 207/313] drm/amdgpu/pm: Remove gpu_od if it's an empty directory gpu_od should be removed if it's an empty directory Signed-off-by: Ma Jun Reported-by: Yang Wang Reviewed-by: Yang Wang Suggested-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index f09b9d49297e..bbd0169010c2 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -4261,6 +4261,13 @@ static int amdgpu_od_set_init(struct amdgpu_device *adev) } } + /* + * If gpu_od is the only member in the list, that means gpu_od is an + * empty directory, so remove it. + */ + if (list_is_singular(&adev->pm.od_kobj_list)) + goto err_out; + return 0; err_out: From aebd3eb9d3ae017e6260043f6bcace2f5ef60694 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 10 Apr 2024 19:30:46 +0530 Subject: [PATCH 208/313] drm/amdgpu: Assign correct bits for SDMA HDP flush HDP Flush request bit can be kept unique per AID, and doesn't need to be unique SOC-wide. Assign only bits 10-13 for SDMA v4.4.2. Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 82eab49be82b..e708468ac54d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -368,7 +368,8 @@ static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 + << (ring->me % adev->sdma.num_inst_per_aid); sdma_v4_4_2_wait_reg_mem(ring, 0, 1, adev->nbio.funcs->get_hdp_flush_done_offset(adev), From d59198d2d0c5cb2a360819b000b0f173c472c9ef Mon Sep 17 00:00:00 2001 From: Peyton Lee Date: Fri, 19 Apr 2024 14:07:39 +0800 Subject: [PATCH 209/313] drm/amdgpu/vpe: fix vpe dpm setup failed The vpe dpm settings should be done before firmware is loaded. Otherwise, the frequency cannot be successfully raised. Signed-off-by: Peyton Lee Reviewed-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 2 +- drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 6695481f870f..c23d97d34b7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -205,7 +205,7 @@ int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe) dpm_ctl &= 0xfffffffe; /* Disable DPM */ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl); dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__); - return 0; + return -EINVAL; } int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c index 769eb8f7bb3c..09315dd5a1ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c @@ -144,6 +144,12 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret); } + /* setup collaborate mode */ + vpe_v6_1_set_collaborate_mode(vpe, true); + /* setup DPM */ + if (amdgpu_vpe_configure_dpm(vpe)) + dev_warn(adev->dev, "VPE failed to enable DPM\n"); + /* * For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well. * Here use instance 0 as master. @@ -159,11 +165,7 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) adev->vpe.cmdbuf_cpu_addr[0] = f32_offset; adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl; - amdgpu_vpe_psp_update_sram(adev); - vpe_v6_1_set_collaborate_mode(vpe, true); - amdgpu_vpe_configure_dpm(vpe); - - return 0; + return amdgpu_vpe_psp_update_sram(adev); } vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data; @@ -196,8 +198,6 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) } vpe_v6_1_halt(vpe, false); - vpe_v6_1_set_collaborate_mode(vpe, true); - amdgpu_vpe_configure_dpm(vpe); return 0; } From b0b13d532105e0e682d95214933bb8483a063184 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 18 Apr 2024 13:56:42 -0400 Subject: [PATCH 210/313] drm/amdgpu: Update BO eviction priorities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make SVM BOs more likely to get evicted than other BOs. These BOs opportunistically use available VRAM, but can fall back relatively seamlessly to system memory. It also avoids SVM migrations evicting other, more important BOs as they will evict other SVM allocations first. Signed-off-by: Felix Kuehling Acked-by: Mukul Joshi Tested-by: Mukul Joshi Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 2099159a693f..ce733e3cb35d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -605,6 +605,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev, else amdgpu_bo_placement_from_domain(bo, bp->domain); if (bp->type == ttm_bo_type_kernel) + bo->tbo.priority = 2; + else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE)) bo->tbo.priority = 1; if (!bp->destroy) From e26305f369ed0e087a043c2cdc76f3d9a6efb3bd Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 19 Apr 2024 13:25:58 -0400 Subject: [PATCH 211/313] drm/amdkfd: Fix rescheduling of restore worker Handle the case that the restore worker was already scheduled by another eviction while the restore was in progress. Fixes: 9a1c1339abf9 ("drm/amdkfd: Run restore_workers on freezable WQs") Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Tested-by: Yunxiang Li Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index aafdf064651f..58c1fe542193 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -2012,9 +2012,9 @@ static void restore_process_worker(struct work_struct *work) if (ret) { pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n", p->pasid, PROCESS_BACK_OFF_TIME_MS); - ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, - msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); - WARN(!ret, "reschedule restore work failed\n"); + if (mod_delayed_work(kfd_restore_wq, &p->restore_work, + msecs_to_jiffies(PROCESS_RESTORE_TIME_MS))) + kfd_process_restore_queues(p); } } From 661d71ee5a010bdc0663e0db701931aff920e8e1 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Fri, 19 Apr 2024 15:40:08 +0800 Subject: [PATCH 212/313] drm/amdgpu/umsch: don't execute umsch test when GPU is in reset/suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit umsch test needs full GPU functionality(e.g., VM update, TLB flush, possibly buffer moving under memory pressure) which may be not ready under these states. Just skip it to avoid potential issues. Signed-off-by: Lang Yu Reviewed-by: Christian König Reviewed-by: Veerabadhran Gopalakrishnan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index 0df97c3e3a70..f7c73533e336 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -774,6 +774,9 @@ static int umsch_mm_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend) + return 0; + return umsch_mm_test(adev); } From 30d1cda8ce31ab49051ff7159280c542a738b23d Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 12 Apr 2024 13:11:14 +0530 Subject: [PATCH 213/313] drm/amd/pm: Restore config space after reset During mode-2 reset, pci config space registers are affected at device side. However, certain platforms have switches which assign virtual BAR addresses and returns the same even after device is reset. This affects pci_restore_state() as it doesn't issue another config write, if the value read is same as the saved value. Add a workaround to write saved config space values from driver side. Presently, these switches are in platforms with SMU v13.0.6 SOCs, hence restrict the workaround only to those. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 3957af057d54..c977ebe88001 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2294,6 +2294,17 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table return sizeof(*gpu_metrics); } +static void smu_v13_0_6_restore_pci_config(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int i; + + for (i = 0; i < 16; i++) + pci_write_config_dword(adev->pdev, i * 4, + adev->pdev->saved_config_space[i]); + pci_restore_msi_state(adev->pdev); +} + static int smu_v13_0_6_mode2_reset(struct smu_context *smu) { int ret = 0, index; @@ -2315,6 +2326,20 @@ static int smu_v13_0_6_mode2_reset(struct smu_context *smu) /* Restore the config space saved during init */ amdgpu_device_load_pci_state(adev->pdev); + /* Certain platforms have switches which assign virtual BAR values to + * devices. OS uses the virtual BAR values and device behind the switch + * is assgined another BAR value. When device's config space registers + * are queried, switch returns the virtual BAR values. When mode-2 reset + * is performed, switch is unaware of it, and will continue to return + * the same virtual values to the OS.This affects + * pci_restore_config_space() API as it doesn't write the value saved if + * the current value read from config space is the same as what is + * saved. As a workaround, make sure the config space is restored + * always. + */ + if (!(adev->flags & AMD_IS_APU)) + smu_v13_0_6_restore_pci_config(smu); + dev_dbg(smu->adev->dev, "wait for reset ack\n"); do { ret = smu_cmn_wait_for_response(smu); From 1e214f7faaf5d842754cd5cfcd76308bfedab3b5 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Thu, 18 Apr 2024 15:13:58 -0400 Subject: [PATCH 214/313] drm/amdkfd: Add VRAM accounting for SVM migration Do VRAM accounting when doing migrations to vram to make sure there is enough available VRAM and migrating to VRAM doesn't evict other possible non-unified memory BOs. If migrating to VRAM fails, driver can fall back to using system memory seamlessly. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 16 +++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index bdc01ca9609a..5c8d81bfce7a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -509,10 +509,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = start_mgr << PAGE_SHIFT; end = (last_mgr + 1) << PAGE_SHIFT; + r = amdgpu_amdkfd_reserve_mem_limit(node->adev, + prange->npages * PAGE_SIZE, + KFD_IOC_ALLOC_MEM_FLAGS_VRAM, + node->xcp ? node->xcp->id : 0); + if (r) { + dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r); + return -ENOSPC; + } + r = svm_range_vram_node_new(node, prange, true); if (r) { dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r); - return r; + goto out; } ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT; @@ -545,6 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, svm_range_vram_node_free(prange); } +out: + amdgpu_amdkfd_unreserve_mem_limit(node->adev, + prange->npages * PAGE_SIZE, + KFD_IOC_ALLOC_MEM_FLAGS_VRAM, + node->xcp ? node->xcp->id : 0); return r < 0 ? r : 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f0f7f48af413..386875e6eb96 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3426,7 +3426,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, mm, KFD_MIGRATE_TRIGGER_PREFETCH); *migrated = !r; - return r; + return 0; } int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) From fe93b0927bc58cb1d64230f45744e527d9d8482c Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 25 Mar 2024 15:33:34 +0800 Subject: [PATCH 215/313] drm/amdgpu: Fix the ring buffer size for queue VM flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here are the corrections needed for the queue ring buffer size calculation for the following cases: - Remove the KIQ VM flush ring usage. - Add the invalidate TLBs packet for gfx10 and gfx11 queue. - There's no VM flush and PFP sync, so remove the gfx9 real ring and compute ring buffer usage. Signed-off-by: Prike Liang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f90905ef32c7..701146d649c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9186,7 +9186,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, @@ -9276,7 +9276,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v10_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ .emit_ib = gfx_v10_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f7325b02a191..f00e05aba46a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6192,7 +6192,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 5 + /* COND_EXEC */ @@ -6278,7 +6278,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v11_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ .emit_ib = gfx_v11_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6f97a6d0e6d0..99dbd2341120 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -6981,7 +6981,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_0_emit_mem_sync */ 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ @@ -7019,7 +7018,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_fence = gfx_v9_0_ring_emit_fence_kiq, From 9792b7cc18aaa0c2acae6af5d0acf249bcb1ab0d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 14 Apr 2024 21:20:56 -0400 Subject: [PATCH 216/313] drm/amdgpu/sdma5.2: use legacy HDP flush for SDMA2/3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids a potential conflict with firmwares with the newer HDP flush mechanism. Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 42f4bd250def..da01b524b9f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -280,17 +280,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + if (ring->me > 1) { + amdgpu_asic_flush_hdp(adev, ring); + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); - amdgpu_ring_write(ring, ref_and_mask); /* reference */ - amdgpu_ring_write(ring, ref_and_mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + } } /** From 948255282074d9367e01908b3f5dcf8c10fc9c3d Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Mon, 22 Apr 2024 16:22:54 +0800 Subject: [PATCH 217/313] drm/amdgpu/mes: fix use-after-free issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete fence fallback timer to fix the ramdom use-after-free issue. v2: move to amdgpu_mes.c Signed-off-by: Jack Xiao Acked-by: Lijo Lazar Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index a00cf4756ad0..1569bef030ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1132,6 +1132,7 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev, return; amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); + del_timer_sync(&ring->fence_drv.fallback_timer); amdgpu_ring_fini(ring); kfree(ring); } From 697f3342477170bdf8759157bdc19c0b7b3e9d14 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 24 Apr 2024 12:36:07 +0800 Subject: [PATCH 218/313] LoongArch: Fix Kconfig item and left code related to CRASH_CORE In commit 85fcde402db191b5 ("kexec: split crashkernel reservation code out from crash_core.c"), crashkernel reservation code is split out from crash_core.c, and add CRASH_RESERVE to control it. And also rename each ARCH's to accordingly. But the relevant part in LoongArch is missed. Do it now. Fixes: 85fcde402db1 ("kexec: split crashkernel reservation code out from crash_core.c") Signed-off-by: Baoquan He Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 +- arch/loongarch/include/asm/{crash_core.h => crash_reserve.h} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename arch/loongarch/include/asm/{crash_core.h => crash_reserve.h} (75%) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index a5f300ec6f28..54ad04dacdee 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -595,7 +595,7 @@ config ARCH_SELECTS_CRASH_DUMP select RELOCATABLE config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION - def_bool CRASH_CORE + def_bool CRASH_RESERVE config RELOCATABLE bool "Relocatable kernel" diff --git a/arch/loongarch/include/asm/crash_core.h b/arch/loongarch/include/asm/crash_reserve.h similarity index 75% rename from arch/loongarch/include/asm/crash_core.h rename to arch/loongarch/include/asm/crash_reserve.h index 218bdbfa527b..a1d9b84b1c7d 100644 --- a/arch/loongarch/include/asm/crash_core.h +++ b/arch/loongarch/include/asm/crash_reserve.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _LOONGARCH_CRASH_CORE_H -#define _LOONGARCH_CRASH_CORE_H +#ifndef _LOONGARCH_CRASH_RESERVE_H +#define _LOONGARCH_CRASH_RESERVE_H #define CRASH_ALIGN SZ_2M From 7ab22b5c2af54e233f3d05d7d601025947e4ff05 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 24 Apr 2024 12:36:07 +0800 Subject: [PATCH 219/313] LoongArch: Fix a build error due to __tlb_remove_tlb_entry() With LLVM=1 and W=1 we get: ./include/asm-generic/tlb.h:629:10: error: parameter 'ptep' set but not used [-Werror,-Wunused-but-set-parameter] We fixed a similar issue via Arnd in the introducing commit, missed the LoongArch variant. Turns out, there is no need for LoongArch to have a custom variant, so let's just drop it and rely on the asm-generic one. Fixes: 4d5bf0b6183f ("mm/mmu_gather: add tlb_remove_tlb_entries()") Closes: https://lkml.kernel.org/r/CANiq72mQh3O9S4umbvrKBgMMorty48UMwS01U22FR0mRyd3cyQ@mail.gmail.com Reported-by: Miguel Ojeda Reviewed-by: Miguel Ojeda Tested-by: Miguel Ojeda Tested-by: Arnd Bergmann Signed-off-by: David Hildenbrand Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/tlb.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h index da7a3b5b9374..e071f5e9e858 100644 --- a/arch/loongarch/include/asm/tlb.h +++ b/arch/loongarch/include/asm/tlb.h @@ -132,8 +132,6 @@ static __always_inline void invtlb_all(u32 op, u32 info, u64 addr) ); } -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) - static void tlb_flush(struct mmu_gather *tlb); #define tlb_flush tlb_flush From efb44ff64c95340b06331fc48634b99efc9dd77c Mon Sep 17 00:00:00 2001 From: Jiantao Shan Date: Wed, 24 Apr 2024 12:36:07 +0800 Subject: [PATCH 220/313] LoongArch: Fix access error when read fault on a write-only VMA As with most architectures, allow handling of read faults in VMAs that have VM_WRITE but without VM_READ (WRITE implies READ). Otherwise, reading before writing a write-only memory will error while reading after writing everything is fine. BTW, move the VM_EXEC judgement before VM_READ/VM_WRITE to make logic a little clearer. Cc: stable@vger.kernel.org Fixes: 09cfefb7fa70c3af01 ("LoongArch: Add memory management") Signed-off-by: Jiantao Shan Signed-off-by: Huacai Chen --- arch/loongarch/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index 1fc2f6813ea0..97b40defde06 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -202,10 +202,10 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { - if (!(vma->vm_flags & VM_READ) && address != exception_era(regs)) - goto bad_area; if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs)) goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_WRITE)) && address != exception_era(regs)) + goto bad_area; } /* From d3119bc985fb645ad3b2a9cf9952c1d56d9daaa3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 24 Apr 2024 12:36:07 +0800 Subject: [PATCH 221/313] LoongArch: Fix callchain parse error with kernel tracepoint events In order to fix perf's callchain parse error for LoongArch, we implement perf_arch_fetch_caller_regs() which fills several necessary registers used for callchain unwinding, including sp, fp, and era. This is similar to the following commits. commit b3eac0265bf6: ("arm: perf: Fix callchain parse error with kernel tracepoint events") commit 5b09a094f2fb: ("arm64: perf: Fix callchain parse error with kernel tracepoint events") commit 9a7e8ec0d4cc: ("riscv: perf: Fix callchain parse error with kernel tracepoint events") Test with commands: perf record -e sched:sched_switch -g --call-graph dwarf perf report Without this patch: Children Self Command Shared Object Symbol ........ ........ ............. ................. .................... 43.41% 43.41% swapper [unknown] [k] 0000000000000000 10.94% 10.94% loong-container [unknown] [k] 0000000000000000 | |--5.98%--0x12006ba38 | |--2.56%--0x12006bb84 | --2.40%--0x12006b6b8 With this patch, callchain can be parsed correctly: Children Self Command Shared Object Symbol ........ ........ ............. ................. .................... 47.57% 47.57% swapper [kernel.vmlinux] [k] __schedule | ---__schedule 26.76% 26.76% loong-container [kernel.vmlinux] [k] __schedule | |--13.78%--0x12006ba38 | | | |--9.19%--__schedule | | | --4.59%--handle_syscall | do_syscall | sys_futex | do_futex | futex_wait | futex_wait_queue_me | hrtimer_start_range_ns | __schedule | |--8.38%--0x12006bb84 | handle_syscall | do_syscall | sys_epoll_pwait | do_epoll_wait | schedule_hrtimeout_range_clock | hrtimer_start_range_ns | __schedule | --4.59%--0x12006b6b8 handle_syscall do_syscall sys_nanosleep hrtimer_nanosleep do_nanosleep hrtimer_start_range_ns __schedule Cc: stable@vger.kernel.org Fixes: b37042b2bb7cd751f0 ("LoongArch: Add perf events support") Reported-by: Youling Tang Suggested-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/perf_event.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h index 2a35a0bc2aaa..52b638059e40 100644 --- a/arch/loongarch/include/asm/perf_event.h +++ b/arch/loongarch/include/asm/perf_event.h @@ -7,6 +7,14 @@ #ifndef __LOONGARCH_PERF_EVENT_H__ #define __LOONGARCH_PERF_EVENT_H__ +#include + #define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->csr_era = (__ip); \ + (regs)->regs[3] = current_stack_pointer; \ + (regs)->regs[22] = (unsigned long) __builtin_frame_address(0); \ +} + #endif /* __LOONGARCH_PERF_EVENT_H__ */ From 9a1f1d04f63c59550a5364858b46eeffdf03e8d6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 23 Apr 2024 20:41:22 -0600 Subject: [PATCH 222/313] smb: client: Fix struct_group() usage in __packed structs Use struct_group_attr() in __packed structs, instead of struct_group(). Below you can see the pahole output before/after changes: pahole -C smb2_file_network_open_info fs/smb/client/smb2ops.o struct smb2_file_network_open_info { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ }; /* 0 56 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ } network_open_info; /* 0 56 */ }; /* 0 56 */ __le32 Reserved; /* 56 4 */ /* size: 60, cachelines: 1, members: 2 */ /* last cacheline: 60 bytes */ } __attribute__((__packed__)); pahole -C smb2_file_network_open_info fs/smb/client/smb2ops.o struct smb2_file_network_open_info { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ } __attribute__((__packed__)); /* 0 52 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ } __attribute__((__packed__)) network_open_info; /* 0 52 */ }; /* 0 52 */ __le32 Reserved; /* 52 4 */ /* size: 56, cachelines: 1, members: 2 */ /* last cacheline: 56 bytes */ }; pahole -C smb_com_open_rsp fs/smb/client/cifssmb.o struct smb_com_open_rsp { ... union { struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ }; /* 48 40 */ struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ } common_attributes; /* 48 40 */ }; /* 48 40 */ ... /* size: 111, cachelines: 2, members: 14 */ /* last cacheline: 47 bytes */ } __attribute__((__packed__)); pahole -C smb_com_open_rsp fs/smb/client/cifssmb.o struct smb_com_open_rsp { ... union { struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ } __attribute__((__packed__)); /* 48 36 */ struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ } __attribute__((__packed__)) common_attributes; /* 48 36 */ }; /* 48 36 */ ... /* size: 107, cachelines: 2, members: 14 */ /* last cacheline: 43 bytes */ } __attribute__((__packed__)); pahole -C FILE_ALL_INFO fs/smb/client/cifssmb.o typedef struct { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ }; /* 0 40 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ } common_attributes; /* 0 40 */ }; /* 0 40 */ ... /* size: 113, cachelines: 2, members: 17 */ /* last cacheline: 49 bytes */ } __attribute__((__packed__)) FILE_ALL_INFO; pahole -C FILE_ALL_INFO fs/smb/client/cifssmb.o typedef struct { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ } __attribute__((__packed__)); /* 0 36 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ } __attribute__((__packed__)) common_attributes; /* 0 36 */ }; /* 0 36 */ ... /* size: 109, cachelines: 2, members: 17 */ /* last cacheline: 45 bytes */ } __attribute__((__packed__)) FILE_ALL_INFO; Fixes: 0015eb6e1238 ("smb: client, common: fix fortify warnings") Cc: stable@vger.kernel.org Reviewed-by: Namjae Jeon Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steve French --- fs/smb/client/cifspdu.h | 4 ++-- fs/smb/client/smb2pdu.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index c0513fbb8a59..c46d418c1c0c 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -882,7 +882,7 @@ typedef struct smb_com_open_rsp { __u8 OplockLevel; __u16 Fid; __le32 CreateAction; - struct_group(common_attributes, + struct_group_attr(common_attributes, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; @@ -2266,7 +2266,7 @@ typedef struct { /* QueryFileInfo/QueryPathinfo (also for SetPath/SetFile) data buffer formats */ /******************************************************************************/ typedef struct { /* data block encoding of response to level 263 QPathInfo */ - struct_group(common_attributes, + struct_group_attr(common_attributes, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index c72a3b2886b7..2fccf0d4f53d 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h @@ -320,7 +320,7 @@ struct smb2_file_reparse_point_info { } __packed; struct smb2_file_network_open_info { - struct_group(network_open_info, + struct_group_attr(network_open_info, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; From f2a904107ee2b647bb7794a1a82b67740d7c8a64 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 22 Apr 2024 05:39:30 -0400 Subject: [PATCH 223/313] net: gtp: Fix Use-After-Free in gtp_dellink Since call_rcu, which is called in the hlist_for_each_entry_rcu traversal of gtp_dellink, is not part of the RCU read critical section, it is possible that the RCU grace period will pass during the traversal and the key will be free. To prevent this, it should be changed to hlist_for_each_entry_safe. Fixes: 94dc550a5062 ("gtp: fix an use-after-free in ipv4_pdp_find()") Signed-off-by: Hyunwoo Kim Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/gtp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index ba4704c2c640..e62d6cbdf9bc 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1098,11 +1098,12 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, static void gtp_dellink(struct net_device *dev, struct list_head *head) { struct gtp_dev *gtp = netdev_priv(dev); + struct hlist_node *next; struct pdp_ctx *pctx; int i; for (i = 0; i < gtp->hash_size; i++) - hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) + hlist_for_each_entry_safe(pctx, next, >p->tid_hash[i], hlist_tid) pdp_context_delete(pctx); list_del_rcu(>p->list); From 5b5f724b05c550e10693a53a81cadca901aefd16 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sun, 21 Apr 2024 01:08:31 +0100 Subject: [PATCH 224/313] net: phy: mediatek-ge-soc: follow netdev LED trigger semantics Only blink if the link is up on a LED which is programmed to also indicate link-status. Otherwise, if both LEDs are in use to indicate different speeds, the resulting blinking being inverted on LEDs which aren't switched on at a specific speed is quite counter-intuitive. Also make sure that state left behind by reset or the bootloader is recognized correctly including the half-duplex and full-duplex bits as well as the (unsupported by Linux netdev trigger semantics) link-down bit. Fixes: c66937b0f8db ("net: phy: mediatek-ge-soc: support PHY LEDs") Signed-off-by: Daniel Golle Signed-off-by: David S. Miller --- drivers/net/phy/mediatek-ge-soc.c | 43 +++++++++++++++++++------------ 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/net/phy/mediatek-ge-soc.c b/drivers/net/phy/mediatek-ge-soc.c index 0f3a1538a8b8..f4f9412d0cd7 100644 --- a/drivers/net/phy/mediatek-ge-soc.c +++ b/drivers/net/phy/mediatek-ge-soc.c @@ -216,6 +216,9 @@ #define MTK_PHY_LED_ON_LINK1000 BIT(0) #define MTK_PHY_LED_ON_LINK100 BIT(1) #define MTK_PHY_LED_ON_LINK10 BIT(2) +#define MTK_PHY_LED_ON_LINK (MTK_PHY_LED_ON_LINK10 |\ + MTK_PHY_LED_ON_LINK100 |\ + MTK_PHY_LED_ON_LINK1000) #define MTK_PHY_LED_ON_LINKDOWN BIT(3) #define MTK_PHY_LED_ON_FDX BIT(4) /* Full duplex */ #define MTK_PHY_LED_ON_HDX BIT(5) /* Half duplex */ @@ -231,6 +234,12 @@ #define MTK_PHY_LED_BLINK_100RX BIT(3) #define MTK_PHY_LED_BLINK_10TX BIT(4) #define MTK_PHY_LED_BLINK_10RX BIT(5) +#define MTK_PHY_LED_BLINK_RX (MTK_PHY_LED_BLINK_10RX |\ + MTK_PHY_LED_BLINK_100RX |\ + MTK_PHY_LED_BLINK_1000RX) +#define MTK_PHY_LED_BLINK_TX (MTK_PHY_LED_BLINK_10TX |\ + MTK_PHY_LED_BLINK_100TX |\ + MTK_PHY_LED_BLINK_1000TX) #define MTK_PHY_LED_BLINK_COLLISION BIT(6) #define MTK_PHY_LED_BLINK_RX_CRC_ERR BIT(7) #define MTK_PHY_LED_BLINK_RX_IDLE_ERR BIT(8) @@ -1247,11 +1256,9 @@ static int mt798x_phy_led_hw_control_get(struct phy_device *phydev, u8 index, if (blink < 0) return -EIO; - if ((on & (MTK_PHY_LED_ON_LINK1000 | MTK_PHY_LED_ON_LINK100 | - MTK_PHY_LED_ON_LINK10)) || - (blink & (MTK_PHY_LED_BLINK_1000RX | MTK_PHY_LED_BLINK_100RX | - MTK_PHY_LED_BLINK_10RX | MTK_PHY_LED_BLINK_1000TX | - MTK_PHY_LED_BLINK_100TX | MTK_PHY_LED_BLINK_10TX))) + if ((on & (MTK_PHY_LED_ON_LINK | MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX | + MTK_PHY_LED_ON_LINKDOWN)) || + (blink & (MTK_PHY_LED_BLINK_RX | MTK_PHY_LED_BLINK_TX))) set_bit(bit_netdev, &priv->led_state); else clear_bit(bit_netdev, &priv->led_state); @@ -1269,7 +1276,7 @@ static int mt798x_phy_led_hw_control_get(struct phy_device *phydev, u8 index, if (!rules) return 0; - if (on & (MTK_PHY_LED_ON_LINK1000 | MTK_PHY_LED_ON_LINK100 | MTK_PHY_LED_ON_LINK10)) + if (on & MTK_PHY_LED_ON_LINK) *rules |= BIT(TRIGGER_NETDEV_LINK); if (on & MTK_PHY_LED_ON_LINK10) @@ -1287,10 +1294,10 @@ static int mt798x_phy_led_hw_control_get(struct phy_device *phydev, u8 index, if (on & MTK_PHY_LED_ON_HDX) *rules |= BIT(TRIGGER_NETDEV_HALF_DUPLEX); - if (blink & (MTK_PHY_LED_BLINK_1000RX | MTK_PHY_LED_BLINK_100RX | MTK_PHY_LED_BLINK_10RX)) + if (blink & MTK_PHY_LED_BLINK_RX) *rules |= BIT(TRIGGER_NETDEV_RX); - if (blink & (MTK_PHY_LED_BLINK_1000TX | MTK_PHY_LED_BLINK_100TX | MTK_PHY_LED_BLINK_10TX)) + if (blink & MTK_PHY_LED_BLINK_TX) *rules |= BIT(TRIGGER_NETDEV_TX); return 0; @@ -1323,15 +1330,19 @@ static int mt798x_phy_led_hw_control_set(struct phy_device *phydev, u8 index, on |= MTK_PHY_LED_ON_LINK1000; if (rules & BIT(TRIGGER_NETDEV_RX)) { - blink |= MTK_PHY_LED_BLINK_10RX | - MTK_PHY_LED_BLINK_100RX | - MTK_PHY_LED_BLINK_1000RX; + blink |= (on & MTK_PHY_LED_ON_LINK) ? + (((on & MTK_PHY_LED_ON_LINK10) ? MTK_PHY_LED_BLINK_10RX : 0) | + ((on & MTK_PHY_LED_ON_LINK100) ? MTK_PHY_LED_BLINK_100RX : 0) | + ((on & MTK_PHY_LED_ON_LINK1000) ? MTK_PHY_LED_BLINK_1000RX : 0)) : + MTK_PHY_LED_BLINK_RX; } if (rules & BIT(TRIGGER_NETDEV_TX)) { - blink |= MTK_PHY_LED_BLINK_10TX | - MTK_PHY_LED_BLINK_100TX | - MTK_PHY_LED_BLINK_1000TX; + blink |= (on & MTK_PHY_LED_ON_LINK) ? + (((on & MTK_PHY_LED_ON_LINK10) ? MTK_PHY_LED_BLINK_10TX : 0) | + ((on & MTK_PHY_LED_ON_LINK100) ? MTK_PHY_LED_BLINK_100TX : 0) | + ((on & MTK_PHY_LED_ON_LINK1000) ? MTK_PHY_LED_BLINK_1000TX : 0)) : + MTK_PHY_LED_BLINK_TX; } if (blink || on) @@ -1344,9 +1355,7 @@ static int mt798x_phy_led_hw_control_set(struct phy_device *phydev, u8 index, MTK_PHY_LED0_ON_CTRL, MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX | - MTK_PHY_LED_ON_LINK10 | - MTK_PHY_LED_ON_LINK100 | - MTK_PHY_LED_ON_LINK1000, + MTK_PHY_LED_ON_LINK, on); if (ret) From 2718a7fdf292b2dcb49c856fa8a6a955ebbbc45f Mon Sep 17 00:00:00 2001 From: Wenkuan Wang Date: Wed, 10 Apr 2024 11:53:08 +0800 Subject: [PATCH 225/313] x86/CPU/AMD: Add models 0x10-0x1f to the Zen5 range Add some more Zen5 models. Fixes: 3e4147f33f8b ("x86/CPU/AMD: Add X86_FEATURE_ZEN5") Signed-off-by: Wenkuan Wang Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240423144111.1362-1-bp@kernel.org --- arch/x86/kernel/cpu/amd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index cb9eece55904..307302af0aee 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -459,8 +459,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) case 0x1a: switch (c->x86_model) { - case 0x00 ... 0x0f: - case 0x20 ... 0x2f: + case 0x00 ... 0x2f: case 0x40 ... 0x4f: case 0x70 ... 0x7f: setup_force_cpu_cap(X86_FEATURE_ZEN5); From b53c6bd5d271d023857174b8fd3e32f98ae51372 Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Sun, 21 Apr 2024 21:17:28 +0200 Subject: [PATCH 226/313] x86/cpu: Fix check for RDPKRU in __show_regs() cpu_feature_enabled(X86_FEATURE_OSPKE) does not necessarily reflect whether CR4.PKE is set on the CPU. In particular, they may differ on non-BSP CPUs before setup_pku() is executed. In this scenario, RDPKRU will #UD causing the system to hang. Fix by checking CR4 for PKE enablement which is always correct for the current CPU. The scenario happens by inserting a WARN* before setup_pku() in identiy_cpu() or some other diagnostic which would lead to calling __show_regs(). [ bp: Massage commit message. ] Signed-off-by: David Kaplan Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240421191728.32239-1-bp@kernel.org --- arch/x86/kernel/process_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 7062b84dd467..6d3d20e3e43a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -139,7 +139,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, log_lvl, d3, d6, d7); } - if (cpu_feature_enabled(X86_FEATURE_OSPKE)) + if (cr4 & X86_CR4_PKE) printk("%sPKRU: %08x\n", log_lvl, read_pkru()); } From 78d9161d2bcd442d93d917339297ffa057dbee8c Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 23 Apr 2024 13:50:53 +0200 Subject: [PATCH 227/313] fbdev: fix incorrect address computation in deferred IO With deferred IO enabled, a page fault happens when data is written to the framebuffer device. Then driver determines which page is being updated by calculating the offset of the written virtual address within the virtual memory area, and uses this offset to get the updated page within the internal buffer. This page is later copied to hardware (thus the name "deferred IO"). This offset calculation is only correct if the virtual memory area is mapped to the beginning of the internal buffer. Otherwise this is wrong. For example, if users do: mmap(ptr, 4096, PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, 0xff000); Then the virtual memory area will mapped at offset 0xff000 within the internal buffer. This offset 0xff000 is not accounted for, and wrong page is updated. Correct the calculation by using vmf->pgoff instead. With this change, the variable "offset" will no longer hold the exact offset value, but it is rounded down to multiples of PAGE_SIZE. But this is still correct, because this variable is only used to calculate the page offset. Reported-by: Harshit Mogalapalli Closes: https://lore.kernel.org/linux-fbdev/271372d6-e665-4e7f-b088-dee5f4ab341a@oracle.com Fixes: 56c134f7f1b5 ("fbdev: Track deferred-I/O pages in pageref struct") Cc: Signed-off-by: Nam Cao Reviewed-by: Thomas Zimmermann Tested-by: Harshit Mogalapalli Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240423115053.4490-1-namcao@linutronix.de --- drivers/video/fbdev/core/fb_defio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index dae96c9f61cf..806ecd32219b 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -196,7 +196,7 @@ static vm_fault_t fb_deferred_io_track_page(struct fb_info *info, unsigned long */ static vm_fault_t fb_deferred_io_page_mkwrite(struct fb_info *info, struct vm_fault *vmf) { - unsigned long offset = vmf->address - vmf->vma->vm_start; + unsigned long offset = vmf->pgoff << PAGE_SHIFT; struct page *page = vmf->page; file_update_time(vmf->vma->vm_file); From d806f474a9a7993648a2c70642ee129316d8deff Mon Sep 17 00:00:00 2001 From: Prathamesh Shete Date: Wed, 24 Apr 2024 15:25:14 +0530 Subject: [PATCH 228/313] gpio: tegra186: Fix tegra186_gpio_is_accessible() check The controller has several register bits describing access control information for a given GPIO pin. When SCR_SEC_[R|W]EN is unset, it means we have full read/write access to all the registers for given GPIO pin. When SCR_SEC[R|W]EN is set, it means we need to further check the accompanying SCR_SEC_G1[R|W] bit to determine read/write access to all the registers for given GPIO pin. This check was previously declaring that a GPIO pin was accessible only if either of the following conditions were met: - SCR_SEC_REN + SCR_SEC_WEN both set or - SCR_SEC_REN + SCR_SEC_WEN both set and SCR_SEC_G1R + SCR_SEC_G1W both set Update the check to properly handle cases where only one of SCR_SEC_REN or SCR_SEC_WEN is set. Fixes: b2b56a163230 ("gpio: tegra186: Check GPIO pin permission before access.") Signed-off-by: Prathamesh Shete Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20240424095514.24397-1-pshete@nvidia.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tegra186.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index d87dd06db40d..9130c691a2dd 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -36,12 +36,6 @@ #define TEGRA186_GPIO_SCR_SEC_REN BIT(27) #define TEGRA186_GPIO_SCR_SEC_G1W BIT(9) #define TEGRA186_GPIO_SCR_SEC_G1R BIT(1) -#define TEGRA186_GPIO_FULL_ACCESS (TEGRA186_GPIO_SCR_SEC_WEN | \ - TEGRA186_GPIO_SCR_SEC_REN | \ - TEGRA186_GPIO_SCR_SEC_G1R | \ - TEGRA186_GPIO_SCR_SEC_G1W) -#define TEGRA186_GPIO_SCR_SEC_ENABLE (TEGRA186_GPIO_SCR_SEC_WEN | \ - TEGRA186_GPIO_SCR_SEC_REN) /* control registers */ #define TEGRA186_GPIO_ENABLE_CONFIG 0x00 @@ -177,10 +171,18 @@ static inline bool tegra186_gpio_is_accessible(struct tegra_gpio *gpio, unsigned value = __raw_readl(secure + TEGRA186_GPIO_SCR); - if ((value & TEGRA186_GPIO_SCR_SEC_ENABLE) == 0) - return true; + /* + * When SCR_SEC_[R|W]EN is unset, then we have full read/write access to all the + * registers for given GPIO pin. + * When SCR_SEC[R|W]EN is set, then there is need to further check the accompanying + * SCR_SEC_G1[R|W] bit to determine read/write access to all the registers for given + * GPIO pin. + */ - if ((value & TEGRA186_GPIO_FULL_ACCESS) == TEGRA186_GPIO_FULL_ACCESS) + if (((value & TEGRA186_GPIO_SCR_SEC_REN) == 0 || + ((value & TEGRA186_GPIO_SCR_SEC_REN) && (value & TEGRA186_GPIO_SCR_SEC_G1R))) && + ((value & TEGRA186_GPIO_SCR_SEC_WEN) == 0 || + ((value & TEGRA186_GPIO_SCR_SEC_WEN) && (value & TEGRA186_GPIO_SCR_SEC_G1W)))) return true; return false; From a0a8d15a798be4b8f20aca2ba91bf6b688c6a640 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 24 Apr 2024 11:20:35 +0300 Subject: [PATCH 229/313] x86/tdx: Preserve shared bit on mprotect() The TDX guest platform takes one bit from the physical address to indicate if the page is shared (accessible by VMM). This bit is not part of the physical_mask and is not preserved during mprotect(). As a result, the 'shared' bit is lost during mprotect() on shared mappings. _COMMON_PAGE_CHG_MASK specifies which PTE bits need to be preserved during modification. AMD includes 'sme_me_mask' in the define to preserve the 'encrypt' bit. To cover both Intel and AMD cases, include 'cc_mask' in _COMMON_PAGE_CHG_MASK instead of 'sme_me_mask'. Reported-and-tested-by: Chris Oo Fixes: 41394e33f3a0 ("x86/tdx: Extend the confidential computing API to support TDX guests") Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Rick Edgecombe Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Tom Lendacky Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240424082035.4092071-1-kirill.shutemov%40linux.intel.com --- arch/x86/include/asm/coco.h | 1 + arch/x86/include/asm/pgtable_types.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index c086699b0d0c..aa6c8f8ca958 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -25,6 +25,7 @@ u64 cc_mkdec(u64 val); void cc_random_init(void); #else #define cc_vendor (CC_VENDOR_NONE) +static const u64 cc_mask = 0; static inline u64 cc_mkenc(u64 val) { diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 0b748ee16b3d..9abb8cc4cd47 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -148,7 +148,7 @@ #define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | \ _PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \ - _PAGE_DEVMAP | _PAGE_ENC | _PAGE_UFFD_WP) + _PAGE_DEVMAP | _PAGE_CC | _PAGE_UFFD_WP) #define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT) #define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE) @@ -173,6 +173,7 @@ enum page_cache_mode { }; #endif +#define _PAGE_CC (_AT(pteval_t, cc_mask)) #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) #define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) From d6dab9017b7cf155e73ba5c7f498de1beb5f8e24 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 12 Apr 2024 23:42:06 +0530 Subject: [PATCH 230/313] drm/xe: Remove sysfs only once on action add failure The drmm_add_action_or_reset function automatically invokes the action (sysfs removal) in the event of a failure; therefore, there's no necessity to call it within the return check. Modify the return type of xe_gt_ccs_mode_sysfs_init to int, allowing the caller to pass errors up the call chain. Should sysfs creation or drmm_add_action_or_reset fail, error propagation will prompt a driver load abort. -v2 Edit commit message (Nikula/Lucas) use err_force_wake label instead of new. (Lucas) Avoid unnecessary warn/error messages. (Lucas) Fixes: f3bc5bb4d53d ("drm/xe: Allow userspace to configure CCS mode") Cc: Lucas De Marchi Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Niranjana Vishwanathapura Reviewed-by: Lucas De Marchi Signed-off-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240412181211.1155732-3-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit a99641e38704202ae2a97202b3d249208c9cda7f) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 4 +++- drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 19 +++++++------------ drivers/gpu/drm/xe/xe_gt_ccs_mode.h | 2 +- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index a0afe1ba6dd5..f9705430ada9 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -378,7 +378,9 @@ static int gt_fw_domain_init(struct xe_gt *gt) err); /* Initialize CCS mode sysfs after early initialization of HW engines */ - xe_gt_ccs_mode_sysfs_init(gt); + err = xe_gt_ccs_mode_sysfs_init(gt); + if (err) + goto err_force_wake; /* * Stash hardware-reported version. Since this register does not exist diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 529fc286cd06..396aeb5b9924 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -167,25 +167,20 @@ static void xe_gt_ccs_mode_sysfs_fini(struct drm_device *drm, void *arg) * and it is expected that there are no open drm clients while doing so. * The number of available compute slices is exposed to user through a per-gt * 'num_cslices' sysfs interface. + * + * Returns: Returns error value for failure and 0 for success. */ -void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt) +int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); int err; if (!xe_gt_ccs_mode_enabled(gt)) - return; + return 0; err = sysfs_create_files(gt->sysfs, gt_ccs_mode_attrs); - if (err) { - drm_warn(&xe->drm, "Sysfs creation for ccs_mode failed err: %d\n", err); - return; - } + if (err) + return err; - err = drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt); - if (err) { - sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs); - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); - } + return drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h index f39975aaaab0..f8779852cf0d 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h @@ -12,7 +12,7 @@ #include "xe_platform_types.h" void xe_gt_apply_ccs_mode(struct xe_gt *gt); -void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt); +int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt); static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt) { From f38c4d224aa37fce1e3fe05db4377ef888f0737f Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 12 Apr 2024 23:42:07 +0530 Subject: [PATCH 231/313] drm/xe: call free_gsc_pkt only once on action add failure The drmm_add_action_or_reset function automatically invokes the action (free_gsc_pkt) in the event of a failure; therefore, there's no necessity to call it within the return check. -v2 Fix commit message. (Lucas) Fixes: d8b1571312b7 ("drm/xe/huc: HuC authentication via GSC") Cc: Rodrigo Vivi Cc: Daniele Ceraolo Spurio Reviewed-by: Lucas De Marchi Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240412181211.1155732-4-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 22bf0bc04d273ca002a47de55693797b13076602) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_huc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index b545f850087c..6b9b1cbedd37 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -53,7 +53,6 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) struct xe_gt *gt = huc_to_gt(huc); struct xe_device *xe = gt_to_xe(gt); struct xe_bo *bo; - int err; /* we use a single object for both input and output */ bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, @@ -66,13 +65,7 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) huc->gsc_pkt = bo; - err = drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); - if (err) { - free_gsc_pkt(&xe->drm, huc); - return err; - } - - return 0; + return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); } int xe_huc_init(struct xe_huc *huc) From e3e989522ac9a6b7960c75b762e1e9568717b31e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 19 Apr 2024 17:03:51 +0200 Subject: [PATCH 232/313] drm/xe/guc: Fix arguments passed to relay G2H handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default CT code was passing just payload of the G2H event message, while Relay code expects full G2H message including HXG header which contains DATA0 field. Fix that. Fixes: 26d4481ac23f ("drm/xe/guc: Start handling GuC Relay event messages") Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240419150351.358-1-michal.wajdeczko@intel.com (cherry picked from commit 48c64d495fbef343c59598a793d583dfd199d389) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_ct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 355edd4d758a..7f32547f94b2 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1054,10 +1054,10 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) adj_len); break; case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF: - ret = xe_guc_relay_process_guc2pf(&guc->relay, payload, adj_len); + ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len); break; case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF: - ret = xe_guc_relay_process_guc2vf(&guc->relay, payload, adj_len); + ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len); break; default: drm_err(&xe->drm, "unexpected action 0x%04x\n", action); From 9bf4e919ccad613b3596eebf1ff37b05b6405307 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 1 Apr 2024 11:24:17 -0700 Subject: [PATCH 233/313] Bluetooth: Fix type of len in {l2cap,sco}_sock_getsockopt_old() After an innocuous optimization change in LLVM main (19.0.0), x86_64 allmodconfig (which enables CONFIG_KCSAN / -fsanitize=thread) fails to build due to the checks in check_copy_size(): In file included from net/bluetooth/sco.c:27: In file included from include/linux/module.h:13: In file included from include/linux/stat.h:19: In file included from include/linux/time.h:60: In file included from include/linux/time32.h:13: In file included from include/linux/timex.h:67: In file included from arch/x86/include/asm/timex.h:6: In file included from arch/x86/include/asm/tsc.h:10: In file included from arch/x86/include/asm/msr.h:15: In file included from include/linux/percpu.h:7: In file included from include/linux/smp.h:118: include/linux/thread_info.h:244:4: error: call to '__bad_copy_from' declared with 'error' attribute: copy source size is too small 244 | __bad_copy_from(); | ^ The same exact error occurs in l2cap_sock.c. The copy_to_user() statements that are failing come from l2cap_sock_getsockopt_old() and sco_sock_getsockopt_old(). This does not occur with GCC with or without KCSAN or Clang without KCSAN enabled. len is defined as an 'int' because it is assigned from '__user int *optlen'. However, it is clamped against the result of sizeof(), which has a type of 'size_t' ('unsigned long' for 64-bit platforms). This is done with min_t() because min() requires compatible types, which results in both len and the result of sizeof() being casted to 'unsigned int', meaning len changes signs and the result of sizeof() is truncated. From there, len is passed to copy_to_user(), which has a third parameter type of 'unsigned long', so it is widened and changes signs again. This excessive casting in combination with the KCSAN instrumentation causes LLVM to fail to eliminate the __bad_copy_from() call, failing the build. The official recommendation from LLVM developers is to consistently use long types for all size variables to avoid the unnecessary casting in the first place. Change the type of len to size_t in both l2cap_sock_getsockopt_old() and sco_sock_getsockopt_old(). This clears up the error while allowing min_t() to be replaced with min(), resulting in simpler code with no casts and fewer implicit conversions. While len is a different type than optlen now, it should result in no functional change because the result of sizeof() will clamp all values of optlen in the same manner as before. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/2007 Link: https://github.com/llvm/llvm-project/issues/85647 Signed-off-by: Nathan Chancellor Reviewed-by: Justin Stitt Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_sock.c | 7 ++++--- net/bluetooth/sco.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index e7d810b23082..5cc83f906c12 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -439,7 +439,8 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct l2cap_options opts; struct l2cap_conninfo cinfo; - int len, err = 0; + int err = 0; + size_t len; u32 opt; BT_DBG("sk %p", sk); @@ -486,7 +487,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, BT_DBG("mode 0x%2.2x", chan->mode); - len = min_t(unsigned int, len, sizeof(opts)); + len = min(len, sizeof(opts)); if (copy_to_user(optval, (char *) &opts, len)) err = -EFAULT; @@ -536,7 +537,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, cinfo.hci_handle = chan->conn->hcon->handle; memcpy(cinfo.dev_class, chan->conn->hcon->dev_class, 3); - len = min_t(unsigned int, len, sizeof(cinfo)); + len = min(len, sizeof(cinfo)); if (copy_to_user(optval, (char *) &cinfo, len)) err = -EFAULT; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 368e026f4d15..5d03c5440b06 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -964,7 +964,8 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, struct sock *sk = sock->sk; struct sco_options opts; struct sco_conninfo cinfo; - int len, err = 0; + int err = 0; + size_t len; BT_DBG("sk %p", sk); @@ -986,7 +987,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, BT_DBG("mtu %u", opts.mtu); - len = min_t(unsigned int, len, sizeof(opts)); + len = min(len, sizeof(opts)); if (copy_to_user(optval, (char *)&opts, len)) err = -EFAULT; @@ -1004,7 +1005,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle; memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3); - len = min_t(unsigned int, len, sizeof(cinfo)); + len = min(len, sizeof(cinfo)); if (copy_to_user(optval, (char *)&cinfo, len)) err = -EFAULT; From 2e7ed5f5e69b6fe93dd3c6b651d041e0a7a456d1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 5 Apr 2024 16:40:33 -0400 Subject: [PATCH 234/313] Bluetooth: hci_sync: Use advertised PHYs on hci_le_ext_create_conn_sync The extended advertising reports do report the PHYs so this store then in hci_conn so it can be later used in hci_le_ext_create_conn_sync to narrow the PHYs to be scanned since the controller will also perform a scan having a smaller set of PHYs shall reduce the time it takes to find and connect peers. Fixes: 288c90224eec ("Bluetooth: Enable all supported LE PHY by default") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 +++- net/bluetooth/hci_conn.c | 6 ++++-- net/bluetooth/hci_event.c | 20 ++++++++++++-------- net/bluetooth/hci_sync.c | 9 ++++++--- net/bluetooth/l2cap_core.c | 2 +- 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 56fb42df44a3..02af7d7013da 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -738,6 +738,8 @@ struct hci_conn { __u8 le_per_adv_data[HCI_MAX_PER_AD_TOT_LEN]; __u16 le_per_adv_data_len; __u16 le_per_adv_data_offset; + __u8 le_adv_phy; + __u8 le_adv_sec_phy; __u8 le_tx_phy; __u8 le_rx_phy; __s8 rssi; @@ -1512,7 +1514,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, enum conn_reasons conn_reason); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, - u16 conn_timeout, u8 role); + u16 conn_timeout, u8 role, u8 phy, u8 sec_phy); void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3ad74f76983b..05346250f719 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1263,7 +1263,7 @@ u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle) struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, - u16 conn_timeout, u8 role) + u16 conn_timeout, u8 role, u8 phy, u8 sec_phy) { struct hci_conn *conn; struct smp_irk *irk; @@ -1326,6 +1326,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, conn->dst_type = dst_type; conn->sec_level = BT_SECURITY_LOW; conn->conn_timeout = conn_timeout; + conn->le_adv_phy = phy; + conn->le_adv_sec_phy = sec_phy; err = hci_connect_le_sync(hdev, conn); if (err) { @@ -2273,7 +2275,7 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, le = hci_connect_le(hdev, dst, dst_type, false, BT_SECURITY_LOW, HCI_LE_CONN_TIMEOUT, - HCI_ROLE_SLAVE); + HCI_ROLE_SLAVE, 0, 0); else le = hci_connect_le_scan(hdev, dst, dst_type, BT_SECURITY_LOW, diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a8b8cfebe018..4d70402e295f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6038,7 +6038,7 @@ static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, void *data, static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type, bool addr_resolved, - u8 adv_type) + u8 adv_type, u8 phy, u8 sec_phy) { struct hci_conn *conn; struct hci_conn_params *params; @@ -6093,7 +6093,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, conn = hci_connect_le(hdev, addr, addr_type, addr_resolved, BT_SECURITY_LOW, hdev->def_le_autoconnect_timeout, - HCI_ROLE_MASTER); + HCI_ROLE_MASTER, phy, sec_phy); if (!IS_ERR(conn)) { /* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned * by higher layer that tried to connect, if no then @@ -6128,8 +6128,9 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, u8 bdaddr_type, bdaddr_t *direct_addr, - u8 direct_addr_type, s8 rssi, u8 *data, u8 len, - bool ext_adv, bool ctl_time, u64 instant) + u8 direct_addr_type, u8 phy, u8 sec_phy, s8 rssi, + u8 *data, u8 len, bool ext_adv, bool ctl_time, + u64 instant) { struct discovery_state *d = &hdev->discovery; struct smp_irk *irk; @@ -6217,7 +6218,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * for advertising reports) and is already verified to be RPA above. */ conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved, - type); + type, phy, sec_phy); if (!ext_adv && conn && type == LE_ADV_IND && len <= max_adv_len(hdev)) { /* Store report for later inclusion by @@ -6363,7 +6364,8 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, if (info->length <= max_adv_len(hdev)) { rssi = info->data[info->length]; process_adv_report(hdev, info->type, &info->bdaddr, - info->bdaddr_type, NULL, 0, rssi, + info->bdaddr_type, NULL, 0, + HCI_ADV_PHY_1M, 0, rssi, info->data, info->length, false, false, instant); } else { @@ -6448,6 +6450,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, if (legacy_evt_type != LE_ADV_INVALID) { process_adv_report(hdev, legacy_evt_type, &info->bdaddr, info->bdaddr_type, NULL, 0, + info->primary_phy, + info->secondary_phy, info->rssi, info->data, info->length, !(evt_type & LE_EXT_ADV_LEGACY_PDU), false, instant); @@ -6730,8 +6734,8 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, &info->direct_addr, - info->direct_addr_type, info->rssi, NULL, 0, - false, false, instant); + info->direct_addr_type, HCI_ADV_PHY_1M, 0, + info->rssi, NULL, 0, false, false, instant); } hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index c5d8799046cc..4c707eb64e6f 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6346,7 +6346,8 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, plen = sizeof(*cp); - if (scan_1m(hdev)) { + if (scan_1m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_1M || + conn->le_adv_sec_phy == HCI_ADV_PHY_1M)) { cp->phys |= LE_SCAN_PHY_1M; set_ext_conn_params(conn, p); @@ -6354,7 +6355,8 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, plen += sizeof(*p); } - if (scan_2m(hdev)) { + if (scan_2m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_2M || + conn->le_adv_sec_phy == HCI_ADV_PHY_2M)) { cp->phys |= LE_SCAN_PHY_2M; set_ext_conn_params(conn, p); @@ -6362,7 +6364,8 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, plen += sizeof(*p); } - if (scan_coded(hdev)) { + if (scan_coded(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_CODED || + conn->le_adv_sec_phy == HCI_ADV_PHY_CODED)) { cp->phys |= LE_SCAN_PHY_CODED; set_ext_conn_params(conn, p); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dc0897408793..84fc70862d78 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7018,7 +7018,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) hcon = hci_connect_le(hdev, dst, dst_type, false, chan->sec_level, timeout, - HCI_ROLE_SLAVE); + HCI_ROLE_SLAVE, 0, 0); else hcon = hci_connect_le_scan(hdev, dst, dst_type, chan->sec_level, timeout, From d1a5a7eede2977da3d2002d5ea3b519019cc1a98 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Fri, 29 Mar 2024 10:34:39 +0800 Subject: [PATCH 235/313] Bluetooth: btusb: Add Realtek RTL8852BE support ID 0x0bda:0x4853 Add the support ID(0x0bda, 0x4853) to usb_device_id table for Realtek RTL8852BE. Without this change the device utilizes an obsolete version of the firmware that is encoded in it rather than the updated Realtek firmware and config files from the firmware directory. The latter files implement many new features. The device table is as follows: T: Bus=03 Lev=01 Prnt=01 Port=09 Cnt=03 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0bda ProdID=4853 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Cc: stable@vger.kernel.org Signed-off-by: Larry Finger Signed-off-by: WangYuli Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 06e915b57283..d9c621d15fee 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -542,6 +542,8 @@ static const struct usb_device_id quirks_table[] = { /* Realtek 8852BE Bluetooth devices */ { USB_DEVICE(0x0cb8, 0xc559), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0bda, 0x4853), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x887b), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0xb85b), .driver_info = BTUSB_REALTEK | From b23d98d46d2858dcc0fd016caff165cbdc24e70a Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 25 Mar 2024 16:11:49 +0800 Subject: [PATCH 236/313] Bluetooth: btusb: Fix triggering coredump implementation for QCA btusb_coredump_qca() uses __hci_cmd_sync() to send a vendor-specific command to trigger firmware coredump, but the command does not have any event as its sync response, so it is not suitable to use __hci_cmd_sync(), fixed by using __hci_cmd_send(). Fixes: 20981ce2d5a5 ("Bluetooth: btusb: Add WCN6855 devcoredump support") Signed-off-by: Zijun Hu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index d9c621d15fee..e3946f7b736e 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3482,13 +3482,12 @@ static void btusb_dump_hdr_qca(struct hci_dev *hdev, struct sk_buff *skb) static void btusb_coredump_qca(struct hci_dev *hdev) { + int err; static const u8 param[] = { 0x26 }; - struct sk_buff *skb; - skb = __hci_cmd_sync(hdev, 0xfc0c, 1, param, HCI_CMD_TIMEOUT); - if (IS_ERR(skb)) - bt_dev_err(hdev, "%s: triggle crash failed (%ld)", __func__, PTR_ERR(skb)); - kfree_skb(skb); + err = __hci_cmd_send(hdev, 0xfc0c, 1, param); + if (err < 0) + bt_dev_err(hdev, "%s: triggle crash failed (%d)", __func__, err); } /* From a9a830a676a9a93c5020f5c61236166931fa4266 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 15 Apr 2024 13:41:01 -0400 Subject: [PATCH 237/313] Bluetooth: hci_event: Fix sending HCI_OP_READ_ENC_KEY_SIZE The code shall always check if HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE has been set before attempting to use HCI_OP_READ_ENC_KEY_SIZE. Fixes: c569242cd492 ("Bluetooth: hci_event: set the conn encrypted before conn establishes") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 ++++ net/bluetooth/hci_event.c | 5 ++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 02af7d7013da..e8f581f3f3ce 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1907,6 +1907,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define privacy_mode_capable(dev) (use_ll_privacy(dev) && \ (hdev->commands[39] & 0x04)) +#define read_key_size_capable(dev) \ + ((dev)->commands[20] & 0x10 && \ + !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) + /* Use enhanced synchronous connection if command is supported and its quirk * has not been set. */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4d70402e295f..4a27e4a17a67 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3218,7 +3218,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, if (key) { set_bit(HCI_CONN_ENCRYPT, &conn->flags); - if (!(hdev->commands[20] & 0x10)) { + if (!read_key_size_capable(hdev)) { conn->enc_key_size = HCI_LINK_KEY_SIZE; } else { cp.handle = cpu_to_le16(conn->handle); @@ -3666,8 +3666,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, * controller really supports it. If it doesn't, assume * the default size (16). */ - if (!(hdev->commands[20] & 0x10) || - test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) { + if (!read_key_size_capable(hdev)) { conn->enc_key_size = HCI_LINK_KEY_SIZE; goto notify; } From 32868e126c78876a8a5ddfcb6ac8cb2fffcf4d27 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 16 Apr 2024 11:15:09 +0200 Subject: [PATCH 238/313] Bluetooth: qca: fix invalid device address check Qualcomm Bluetooth controllers may not have been provisioned with a valid device address and instead end up using the default address 00:00:00:00:5a:ad. This was previously believed to be due to lack of persistent storage for the address but it may also be due to integrators opting to not use the on-chip OTP memory and instead store the address elsewhere (e.g. in storage managed by secure world firmware). According to Qualcomm, at least WCN6750, WCN6855 and WCN7850 have on-chip OTP storage for the address. As the device type alone cannot be used to determine when the address is valid, instead read back the address during setup() and only set the HCI_QUIRK_USE_BDADDR_PROPERTY flag when needed. This specifically makes sure that controllers that have been provisioned with an address do not start as unconfigured. Reported-by: Janaki Ramaiah Thota Link: https://lore.kernel.org/r/124a7d54-5a18-4be7-9a76-a12017f6cce5@quicinc.com/ Fixes: 5971752de44c ("Bluetooth: hci_qca: Set HCI_QUIRK_USE_BDADDR_PROPERTY for wcn3990") Fixes: e668eb1e1578 ("Bluetooth: hci_core: Don't stop BT if the BD address missing in dts") Fixes: 6945795bc81a ("Bluetooth: fix use-bdaddr-property quirk") Cc: stable@vger.kernel.org # 6.5 Cc: Matthias Kaehlcke Signed-off-by: Johan Hovold Reported-by: Janaki Ramaiah Thota Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 38 +++++++++++++++++++++++++++++++++++++ drivers/bluetooth/hci_qca.c | 2 -- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 19cfc342fc7b..216826c31ee3 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -15,6 +15,8 @@ #define VERSION "0.1" +#define QCA_BDADDR_DEFAULT (&(bdaddr_t) {{ 0xad, 0x5a, 0x00, 0x00, 0x00, 0x00 }}) + int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver, enum qca_btsoc_type soc_type) { @@ -612,6 +614,38 @@ int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr) } EXPORT_SYMBOL_GPL(qca_set_bdaddr_rome); +static int qca_check_bdaddr(struct hci_dev *hdev) +{ + struct hci_rp_read_bd_addr *bda; + struct sk_buff *skb; + int err; + + if (bacmp(&hdev->public_addr, BDADDR_ANY)) + return 0; + + skb = __hci_cmd_sync(hdev, HCI_OP_READ_BD_ADDR, 0, NULL, + HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "Failed to read device address (%d)", err); + return err; + } + + if (skb->len != sizeof(*bda)) { + bt_dev_err(hdev, "Device address length mismatch"); + kfree_skb(skb); + return -EIO; + } + + bda = (struct hci_rp_read_bd_addr *)skb->data; + if (!bacmp(&bda->bdaddr, QCA_BDADDR_DEFAULT)) + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + + kfree_skb(skb); + + return 0; +} + static void qca_generate_hsp_nvm_name(char *fwname, size_t max_size, struct qca_btsoc_version ver, u8 rom_ver, u16 bid) { @@ -818,6 +852,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, break; } + err = qca_check_bdaddr(hdev); + if (err) + return err; + bt_dev_info(hdev, "QCA setup on UART is completed"); return 0; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index ecbc52eaf101..92fa20f5ac7d 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1905,8 +1905,6 @@ static int qca_setup(struct hci_uart *hu) case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); - qcadev = serdev_device_get_drvdata(hu->serdev); if (qcadev->bdaddr_property_broken) set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks); From 6eb5fcc416f127f220b9177a5c9ae751cac1cda8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 16 Apr 2024 15:34:45 -0400 Subject: [PATCH 239/313] Bluetooth: MGMT: Fix failing to MGMT_OP_ADD_UUID/MGMT_OP_REMOVE_UUID These commands don't require the adapter to be up and running so don't use hci_cmd_sync_queue which would check that flag, instead use hci_cmd_sync_submit which would ensure mgmt_class_complete is set properly regardless if any command was actually run or not. Link: https://github.com/bluez/bluez/issues/809 Fixes: d883a4669a1d ("Bluetooth: hci_sync: Only allow hci_cmd_sync_queue if running") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 32ed6e9245a3..657abd9bcc87 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2623,7 +2623,11 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto failed; } - err = hci_cmd_sync_queue(hdev, add_uuid_sync, cmd, mgmt_class_complete); + /* MGMT_OP_ADD_UUID don't require adapter the UP/Running so use + * hci_cmd_sync_submit instead of hci_cmd_sync_queue. + */ + err = hci_cmd_sync_submit(hdev, add_uuid_sync, cmd, + mgmt_class_complete); if (err < 0) { mgmt_pending_free(cmd); goto failed; @@ -2717,8 +2721,11 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - err = hci_cmd_sync_queue(hdev, remove_uuid_sync, cmd, - mgmt_class_complete); + /* MGMT_OP_REMOVE_UUID don't require adapter the UP/Running so use + * hci_cmd_sync_submit instead of hci_cmd_sync_queue. + */ + err = hci_cmd_sync_submit(hdev, remove_uuid_sync, cmd, + mgmt_class_complete); if (err < 0) mgmt_pending_free(cmd); @@ -2784,8 +2791,11 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - err = hci_cmd_sync_queue(hdev, set_class_sync, cmd, - mgmt_class_complete); + /* MGMT_OP_SET_DEV_CLASS don't require adapter the UP/Running so use + * hci_cmd_sync_submit instead of hci_cmd_sync_queue. + */ + err = hci_cmd_sync_submit(hdev, set_class_sync, cmd, + mgmt_class_complete); if (err < 0) mgmt_pending_free(cmd); From 18bdb386a1a30e7a3d7732a98e45e69cf6b5710d Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 17 Apr 2024 16:27:38 -0700 Subject: [PATCH 240/313] Bluetooth: btusb: mediatek: Fix double free of skb in coredump hci_devcd_append() would free the skb on error so the caller don't have to free it again otherwise it would cause the double free of skb. Fixes: 0b7015132878 ("Bluetooth: btusb: mediatek: add MediaTek devcoredump support") Reported-by : Dan Carpenter Signed-off-by: Sean Wang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index ac8ebccd3507..812fd2a8f853 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -380,8 +380,10 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb) switch (data->cd_info.state) { case HCI_DEVCOREDUMP_IDLE: err = hci_devcd_init(hdev, MTK_COREDUMP_SIZE); - if (err < 0) + if (err < 0) { + kfree_skb(skb); break; + } data->cd_info.cnt = 0; /* It is supposed coredump can be done within 5 seconds */ @@ -407,9 +409,6 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb) break; } - if (err < 0) - kfree_skb(skb); - return err; } EXPORT_SYMBOL_GPL(btmtk_process_coredump); From 73e87c0a49fda31d7b589edccf4c72e924411371 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Apr 2024 15:57:47 +0200 Subject: [PATCH 241/313] Bluetooth: qca: fix NULL-deref on non-serdev suspend Qualcomm ROME controllers can be registered from the Bluetooth line discipline and in this case the HCI UART serdev pointer is NULL. Add the missing sanity check to prevent a NULL-pointer dereference when wakeup() is called for a non-serdev controller during suspend. Just return true for now to restore the original behaviour and address the crash with pre-6.2 kernels, which do not have commit e9b3e5b8c657 ("Bluetooth: hci_qca: only assign wakeup with serial port support") that causes the crash to happen already at setup() time. Fixes: c1a74160eaf1 ("Bluetooth: hci_qca: Add device_may_wakeup support") Cc: stable@vger.kernel.org # 5.13 Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 92fa20f5ac7d..94c85f4fbf3b 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1672,6 +1672,9 @@ static bool qca_wakeup(struct hci_dev *hdev) struct hci_uart *hu = hci_get_drvdata(hdev); bool wakeup; + if (!hu->serdev) + return true; + /* BT SoC attached through the serial bus is handled by the serdev driver. * So we need to use the device handle of the serdev driver to get the * status of device may wakeup. From 7ddb9de6af0f1c71147785b12fd7c8ec3f06cc86 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Apr 2024 15:57:48 +0200 Subject: [PATCH 242/313] Bluetooth: qca: fix NULL-deref on non-serdev setup Qualcomm ROME controllers can be registered from the Bluetooth line discipline and in this case the HCI UART serdev pointer is NULL. Add the missing sanity check to prevent a NULL-pointer dereference when setup() is called for a non-serdev controller. Fixes: e9b3e5b8c657 ("Bluetooth: hci_qca: only assign wakeup with serial port support") Cc: stable@vger.kernel.org # 6.2 Cc: Zhengping Jiang Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 94c85f4fbf3b..b621a0a40ea4 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1958,8 +1958,10 @@ static int qca_setup(struct hci_uart *hu) qca_debugfs_init(hdev); hu->hdev->hw_error = qca_hw_error; hu->hdev->cmd_timeout = qca_cmd_timeout; - if (device_can_wakeup(hu->serdev->ctrl->dev.parent)) - hu->hdev->wakeup = qca_wakeup; + if (hu->serdev) { + if (device_can_wakeup(hu->serdev->ctrl->dev.parent)) + hu->hdev->wakeup = qca_wakeup; + } } else if (ret == -ENOENT) { /* No patch/nvm-config found, run with original fw/config */ set_bit(QCA_ROM_FW, &qca->flags); From 88cd6e6b2d327faa13e4505b07f1e380e51b21ff Mon Sep 17 00:00:00 2001 From: Chun-Yi Lee Date: Wed, 24 Apr 2024 21:59:03 +0800 Subject: [PATCH 243/313] Bluetooth: hci_sync: Using hci_cmd_sync_submit when removing Adv Monitor Since the d883a4669a1de be introduced in v6.4, bluetooth daemon got the following failed message of MGMT_OP_REMOVE_ADV_MONITOR command when controller is power-off: bluetoothd[20976]: src/adapter.c:reset_adv_monitors_complete() Failed to reset Adv Monitors: Failed> Normally this situation is happened when the bluetoothd deamon be started manually after system booting. Which means that bluetoothd received MGMT_EV_INDEX_ADDED event after kernel runs hci_power_off(). Base on doc/mgmt-api.txt, the MGMT_OP_REMOVE_ADV_MONITOR command can be used when the controller is not powered. This patch changes the code in remove_adv_monitor() to use hci_cmd_sync_submit() instead of hci_cmd_sync_queue(). Fixes: d883a4669a1de ("Bluetooth: hci_sync: Only allow hci_cmd_sync_queue if running") Cc: Luiz Augusto von Dentz Cc: Manish Mandlik Cc: Archie Pusaka Cc: Miao-chen Chou Signed-off-by: Chun-Yi Lee Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 657abd9bcc87..965f621ef865 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5485,8 +5485,8 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, goto unlock; } - err = hci_cmd_sync_queue(hdev, mgmt_remove_adv_monitor_sync, cmd, - mgmt_remove_adv_monitor_complete); + err = hci_cmd_sync_submit(hdev, mgmt_remove_adv_monitor_sync, cmd, + mgmt_remove_adv_monitor_complete); if (err) { mgmt_pending_remove(cmd); From 3d05fc82237aa97162d0d7dc300b55bb34e91d02 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 24 Apr 2024 14:29:32 +0200 Subject: [PATCH 244/313] Bluetooth: qca: set power_ctrl_enabled on NULL returned by gpiod_get_optional() Any return value from gpiod_get_optional() other than a pointer to a GPIO descriptor or a NULL-pointer is an error and the driver should abort probing. That being said: commit 56d074d26c58 ("Bluetooth: hci_qca: don't use IS_ERR_OR_NULL() with gpiod_get_optional()") no longer sets power_ctrl_enabled on NULL-pointer returned by devm_gpiod_get_optional(). Restore this behavior but bail-out on errors. While at it: also bail-out on error returned when trying to get the "swctrl" GPIO. Reported-by: Wren Turkal Reported-by: Zijun Hu Closes: https://lore.kernel.org/linux-bluetooth/1713449192-25926-2-git-send-email-quic_zijuhu@quicinc.com/ Fixes: 56d074d26c58 ("Bluetooth: hci_qca: don't use IS_ERR_OR_NULL() with gpiod_get_optional()") Reviewed-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski Tested-by: Wren Turkal" Reported-by: Wren Turkal Reported-by: Zijun Hu Reviewed-by: Krzysztof Kozlowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index b621a0a40ea4..0c9c9ee56592 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2332,16 +2332,21 @@ static int qca_serdev_probe(struct serdev_device *serdev) (data->soc_type == QCA_WCN6750 || data->soc_type == QCA_WCN6855)) { dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); - power_ctrl_enabled = false; + return PTR_ERR(qcadev->bt_en); } + if (!qcadev->bt_en) + power_ctrl_enabled = false; + qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", GPIOD_IN); if (IS_ERR(qcadev->sw_ctrl) && (data->soc_type == QCA_WCN6750 || data->soc_type == QCA_WCN6855 || - data->soc_type == QCA_WCN7850)) - dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); + data->soc_type == QCA_WCN7850)) { + dev_err(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); + return PTR_ERR(qcadev->sw_ctrl); + } qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); if (IS_ERR(qcadev->susclk)) { @@ -2360,10 +2365,13 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(qcadev->bt_en)) { - dev_warn(&serdev->dev, "failed to acquire enable gpio\n"); - power_ctrl_enabled = false; + dev_err(&serdev->dev, "failed to acquire enable gpio\n"); + return PTR_ERR(qcadev->bt_en); } + if (!qcadev->bt_en) + power_ctrl_enabled = false; + qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); if (IS_ERR(qcadev->susclk)) { dev_warn(&serdev->dev, "failed to acquire clk\n"); From e10d3ba4d434ed172914617ed8d74bd411421193 Mon Sep 17 00:00:00 2001 From: Ismael Luceno Date: Sun, 21 Apr 2024 16:22:32 +0200 Subject: [PATCH 245/313] ipvs: Fix checksumming on GSO of SCTP packets It was observed in the wild that pairs of consecutive packets would leave the IPVS with the same wrong checksum, and the issue only went away when disabling GSO. IPVS needs to avoid computing the SCTP checksum when using GSO. Fixes: 90017accff61 ("sctp: Add GSO support") Co-developed-by: Firo Yang Signed-off-by: Ismael Luceno Tested-by: Andreas Taschner Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index a0921adc31a9..1e689c714127 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -126,7 +126,8 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (sctph->source != cp->vport || payload_csum || skb->ip_summed == CHECKSUM_PARTIAL) { sctph->source = cp->vport; - sctp_nat_csum(skb, sctph, sctphoff); + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + sctp_nat_csum(skb, sctph, sctphoff); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -174,7 +175,8 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, (skb->ip_summed == CHECKSUM_PARTIAL && !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; - sctp_nat_csum(skb, sctph, sctphoff); + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_UNNECESSARY; } From 5ea7b72d4fac2fdbc0425cd8f2ea33abe95235b2 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 22 Apr 2024 05:37:17 -0400 Subject: [PATCH 246/313] net: openvswitch: Fix Use-After-Free in ovs_ct_exit Since kfree_rcu, which is called in the hlist_for_each_entry_rcu traversal of ovs_ct_limit_exit, is not part of the RCU read critical section, it is possible that the RCU grace period will pass during the traversal and the key will be free. To prevent this, it should be changed to hlist_for_each_entry_safe. Fixes: 11efd5cb04a1 ("openvswitch: Support conntrack zone limit") Signed-off-by: Hyunwoo Kim Reviewed-by: Eric Dumazet Reviewed-by: Aaron Conole Link: https://lore.kernel.org/r/ZiYvzQN/Ry5oeFQW@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Jakub Kicinski --- net/openvswitch/conntrack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 74b63cdb5992..2928c142a2dd 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1593,9 +1593,9 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net) for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) { struct hlist_head *head = &info->limits[i]; struct ovs_ct_limit *ct_limit; + struct hlist_node *next; - hlist_for_each_entry_rcu(ct_limit, head, hlist_node, - lockdep_ovsl_is_held()) + hlist_for_each_entry_safe(ct_limit, next, head, hlist_node) kfree_rcu(ct_limit, rcu); } kfree(info->limits); From 627f9c1bb882765a84aa78015abbacd783d429be Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:54 +0200 Subject: [PATCH 247/313] mlxsw: spectrum_acl_tcam: Fix race in region ID allocation Region identifiers can be allocated both when user space tries to insert a new tc filter and when filters are migrated from one region to another as part of the rehash delayed work. There is no lock protecting the bitmap from which these identifiers are allocated from, which is racy and leads to bad parameter errors from the device's firmware. Fix by converting the bitmap to IDA which handles its own locking. For consistency, do the same for the group identifiers that are part of the same structure. Fixes: 2bffc5322fd8 ("mlxsw: spectrum_acl: Don't take mutex in mlxsw_sp_acl_tcam_vregion_rehash_work()") Reported-by: Amit Cohen Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ce494b7940cadfe84f3e18da7785b51ef5f776e3.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlxsw/spectrum_acl_tcam.c | 61 ++++++++----------- .../mellanox/mlxsw/spectrum_acl_tcam.h | 5 +- 2 files changed, 30 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index f20052776b3f..b6a4652a6475 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -58,41 +59,43 @@ int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_acl_tcam_region_id_get(struct mlxsw_sp_acl_tcam *tcam, u16 *p_id) { - u16 id; + int id; - id = find_first_zero_bit(tcam->used_regions, tcam->max_regions); - if (id < tcam->max_regions) { - __set_bit(id, tcam->used_regions); - *p_id = id; - return 0; - } - return -ENOBUFS; + id = ida_alloc_max(&tcam->used_regions, tcam->max_regions - 1, + GFP_KERNEL); + if (id < 0) + return id; + + *p_id = id; + + return 0; } static void mlxsw_sp_acl_tcam_region_id_put(struct mlxsw_sp_acl_tcam *tcam, u16 id) { - __clear_bit(id, tcam->used_regions); + ida_free(&tcam->used_regions, id); } static int mlxsw_sp_acl_tcam_group_id_get(struct mlxsw_sp_acl_tcam *tcam, u16 *p_id) { - u16 id; + int id; - id = find_first_zero_bit(tcam->used_groups, tcam->max_groups); - if (id < tcam->max_groups) { - __set_bit(id, tcam->used_groups); - *p_id = id; - return 0; - } - return -ENOBUFS; + id = ida_alloc_max(&tcam->used_groups, tcam->max_groups - 1, + GFP_KERNEL); + if (id < 0) + return id; + + *p_id = id; + + return 0; } static void mlxsw_sp_acl_tcam_group_id_put(struct mlxsw_sp_acl_tcam *tcam, u16 id) { - __clear_bit(id, tcam->used_groups); + ida_free(&tcam->used_groups, id); } struct mlxsw_sp_acl_tcam_pattern { @@ -1549,19 +1552,11 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, if (max_tcam_regions < max_regions) max_regions = max_tcam_regions; - tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL); - if (!tcam->used_regions) { - err = -ENOMEM; - goto err_alloc_used_regions; - } + ida_init(&tcam->used_regions); tcam->max_regions = max_regions; max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS); - tcam->used_groups = bitmap_zalloc(max_groups, GFP_KERNEL); - if (!tcam->used_groups) { - err = -ENOMEM; - goto err_alloc_used_groups; - } + ida_init(&tcam->used_groups); tcam->max_groups = max_groups; tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUP_SIZE); @@ -1575,10 +1570,8 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, return 0; err_tcam_init: - bitmap_free(tcam->used_groups); -err_alloc_used_groups: - bitmap_free(tcam->used_regions); -err_alloc_used_regions: + ida_destroy(&tcam->used_groups); + ida_destroy(&tcam->used_regions); mlxsw_sp_acl_tcam_rehash_params_unregister(mlxsw_sp); err_rehash_params_register: mutex_destroy(&tcam->lock); @@ -1591,8 +1584,8 @@ void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; ops->fini(mlxsw_sp, tcam->priv); - bitmap_free(tcam->used_groups); - bitmap_free(tcam->used_regions); + ida_destroy(&tcam->used_groups); + ida_destroy(&tcam->used_regions); mlxsw_sp_acl_tcam_rehash_params_unregister(mlxsw_sp); mutex_destroy(&tcam->lock); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index 462bf448497d..79a1d8606512 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -6,15 +6,16 @@ #include #include +#include #include "reg.h" #include "spectrum.h" #include "core_acl_flex_keys.h" struct mlxsw_sp_acl_tcam { - unsigned long *used_regions; /* bit array */ + struct ida used_regions; unsigned int max_regions; - unsigned long *used_groups; /* bit array */ + struct ida used_groups; unsigned int max_groups; unsigned int max_group_size; struct mutex lock; /* guards vregion list */ From d90cfe20562407d9f080d24123078d666d730707 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:55 +0200 Subject: [PATCH 248/313] mlxsw: spectrum_acl_tcam: Fix race during rehash delayed work The purpose of the rehash delayed work is to reduce the number of masks (eRPs) used by an ACL region as the eRP bank is a global and limited resource. This is done in three steps: 1. Creating a new set of masks and a new ACL region which will use the new masks and to which the existing filters will be migrated to. The new region is assigned to 'vregion->region' and the region from which the filters are migrated from is assigned to 'vregion->region2'. 2. Migrating all the filters from the old region to the new region. 3. Destroying the old region and setting 'vregion->region2' to NULL. Only the second steps is performed under the 'vregion->lock' mutex although its comments says that among other things it "Protects consistency of region, region2 pointers". This is problematic as the first step can race with filter insertion from user space that uses 'vregion->region', but under the mutex. Fix by holding the mutex across the entirety of the delayed work and not only during the second step. Fixes: 2bffc5322fd8 ("mlxsw: spectrum_acl: Don't take mutex in mlxsw_sp_acl_tcam_vregion_rehash_work()") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1ec1d54edf2bad0a369e6b4fa030aba64e1f124b.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index b6a4652a6475..9c0c728bb42d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -718,7 +718,9 @@ static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) rehash.dw.work); int credits = MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS; + mutex_lock(&vregion->lock); mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion, &credits); + mutex_unlock(&vregion->lock); if (credits < 0) /* Rehash gone out of credits so it was interrupted. * Schedule the work as soon as possible to continue. @@ -1323,7 +1325,6 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, int err, err2; trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion); - mutex_lock(&vregion->lock); err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); if (err) { @@ -1343,7 +1344,6 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, /* Let the rollback to be continued later on. */ } } - mutex_unlock(&vregion->lock); trace_mlxsw_sp_acl_tcam_vregion_migrate_end(mlxsw_sp, vregion); return err; } From 79b5b4b18bc85b19d3a518483f9abbbe6d7b3ba4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:56 +0200 Subject: [PATCH 249/313] mlxsw: spectrum_acl_tcam: Fix possible use-after-free during activity update The rule activity update delayed work periodically traverses the list of configured rules and queries their activity from the device. As part of this task it accesses the entry pointed by 'ventry->entry', but this entry can be changed concurrently by the rehash delayed work, leading to a use-after-free [1]. Fix by closing the race and perform the activity query under the 'vregion->lock' mutex. [1] BUG: KASAN: slab-use-after-free in mlxsw_sp_acl_tcam_flower_rule_activity_get+0x121/0x140 Read of size 8 at addr ffff8881054ed808 by task kworker/0:18/181 CPU: 0 PID: 181 Comm: kworker/0:18 Not tainted 6.9.0-rc2-custom-00781-gd5ab772d32f7 #2 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_rule_activity_update_work Call Trace: dump_stack_lvl+0xc6/0x120 print_report+0xce/0x670 kasan_report+0xd7/0x110 mlxsw_sp_acl_tcam_flower_rule_activity_get+0x121/0x140 mlxsw_sp_acl_rule_activity_update_work+0x219/0x400 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Allocated by task 1039: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0x8f/0xa0 __kmalloc+0x19c/0x360 mlxsw_sp_acl_tcam_entry_create+0x7b/0x1f0 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x30d/0xb50 mlxsw_sp_acl_tcam_vregion_rehash_work+0x157/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Freed by task 1039: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 poison_slab_object+0x102/0x170 __kasan_slab_free+0x14/0x30 kfree+0xc1/0x290 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x3d7/0xb50 mlxsw_sp_acl_tcam_vregion_rehash_work+0x157/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Fixes: 2bffc5322fd8 ("mlxsw: spectrum_acl: Don't take mutex in mlxsw_sp_acl_tcam_vregion_rehash_work()") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1fcce0a60b231ebeb2515d91022284ba7b4ffe7a.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 9c0c728bb42d..7e69225c057d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1159,8 +1159,14 @@ mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_ventry *ventry, bool *activity) { - return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, - ventry->entry, activity); + struct mlxsw_sp_acl_tcam_vregion *vregion = ventry->vchunk->vregion; + int err; + + mutex_lock(&vregion->lock); + err = mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, ventry->entry, + activity); + mutex_unlock(&vregion->lock); + return err; } static int From 54225988889931467a9b55fdbef534079b665519 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:57 +0200 Subject: [PATCH 250/313] mlxsw: spectrum_acl_tcam: Fix possible use-after-free during rehash The rehash delayed work migrates filters from one region to another according to the number of available credits. The migrated from region is destroyed at the end of the work if the number of credits is non-negative as the assumption is that this is indicative of migration being complete. This assumption is incorrect as a non-negative number of credits can also be the result of a failed migration. The destruction of a region that still has filters referencing it can result in a use-after-free [1]. Fix by not destroying the region if migration failed. [1] BUG: KASAN: slab-use-after-free in mlxsw_sp_acl_ctcam_region_entry_remove+0x21d/0x230 Read of size 8 at addr ffff8881735319e8 by task kworker/0:31/3858 CPU: 0 PID: 3858 Comm: kworker/0:31 Tainted: G W 6.9.0-rc2-custom-00782-gf2275c2157d8 #5 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work Call Trace: dump_stack_lvl+0xc6/0x120 print_report+0xce/0x670 kasan_report+0xd7/0x110 mlxsw_sp_acl_ctcam_region_entry_remove+0x21d/0x230 mlxsw_sp_acl_ctcam_entry_del+0x2e/0x70 mlxsw_sp_acl_atcam_entry_del+0x81/0x210 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x3cd/0xb50 mlxsw_sp_acl_tcam_vregion_rehash_work+0x157/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Allocated by task 174: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0x8f/0xa0 __kmalloc+0x19c/0x360 mlxsw_sp_acl_tcam_region_create+0xdf/0x9c0 mlxsw_sp_acl_tcam_vregion_rehash_work+0x954/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Freed by task 7: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 poison_slab_object+0x102/0x170 __kasan_slab_free+0x14/0x30 kfree+0xc1/0x290 mlxsw_sp_acl_tcam_region_destroy+0x272/0x310 mlxsw_sp_acl_tcam_vregion_rehash_work+0x731/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Fixes: c9c9af91f1d9 ("mlxsw: spectrum_acl: Allow to interrupt/continue rehash work") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/3e412b5659ec2310c5c615760dfe5eac18dd7ebd.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 7e69225c057d..1ff0b2c7c11d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1451,6 +1451,7 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, ctx, credits); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + return; } if (*credits >= 0) From 5bcf925587e9b5d36420d572a0b4d131c90fb306 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:58 +0200 Subject: [PATCH 251/313] mlxsw: spectrum_acl_tcam: Rate limit error message In the rare cases when the device resources are exhausted it is likely that the rehash delayed work will fail. An error message will be printed whenever this happens which can be overwhelming considering the fact that the work is per-region and that there can be hundreds of regions. Fix by rate limiting the error message. Fixes: e5e7962ee5c2 ("mlxsw: spectrum_acl: Implement region migration according to hints") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/c510763b2ebd25e7990d80183feff91cde593145.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 1ff0b2c7c11d..568ae7092fe0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1450,7 +1450,7 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion, ctx, credits); if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); return; } From 8ca3f7a7b61393804c46f170743c3b839df13977 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:59 +0200 Subject: [PATCH 252/313] mlxsw: spectrum_acl_tcam: Fix memory leak during rehash The rehash delayed work migrates filters from one region to another. This is done by iterating over all chunks (all the filters with the same priority) in the region and in each chunk iterating over all the filters. If the migration fails, the code tries to migrate the filters back to the old region. However, the rollback itself can also fail in which case another migration will be erroneously performed. Besides the fact that this ping pong is not a very good idea, it also creates a problem. Each virtual chunk references two chunks: The currently used one ('vchunk->chunk') and a backup ('vchunk->chunk2'). During migration the first holds the chunk we want to migrate filters to and the second holds the chunk we are migrating filters from. The code currently assumes - but does not verify - that the backup chunk does not exist (NULL) if the currently used chunk does not reference the target region. This assumption breaks when we are trying to rollback a rollback, resulting in the backup chunk being overwritten and leaked [1]. Fix by not rolling back a failed rollback and add a warning to avoid future cases. [1] WARNING: CPU: 5 PID: 1063 at lib/parman.c:291 parman_destroy+0x17/0x20 Modules linked in: CPU: 5 PID: 1063 Comm: kworker/5:11 Tainted: G W 6.9.0-rc2-custom-00784-gc6a05c468a0b #14 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:parman_destroy+0x17/0x20 [...] Call Trace: mlxsw_sp_acl_atcam_region_fini+0x19/0x60 mlxsw_sp_acl_tcam_region_destroy+0x49/0xf0 mlxsw_sp_acl_tcam_vregion_rehash_work+0x1f1/0x470 process_one_work+0x151/0x370 worker_thread+0x2cb/0x3e0 kthread+0xd0/0x100 ret_from_fork+0x34/0x50 ret_from_fork_asm+0x1a/0x30 Fixes: 843500518509 ("mlxsw: spectrum_acl: Do rollback as another call to mlxsw_sp_acl_tcam_vchunk_migrate_all()") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/d5edd4f4503934186ae5cfe268503b16345b4e0f.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 568ae7092fe0..0902eb7651e1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1200,6 +1200,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_start(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_chunk *new_chunk; + WARN_ON(vchunk->chunk2); + new_chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region); if (IS_ERR(new_chunk)) return PTR_ERR(new_chunk); @@ -1334,6 +1336,8 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); if (err) { + if (ctx->this_is_rollback) + return err; /* In case migration was not successful, we need to swap * so the original region pointer is assigned again * to vregion->region. From 743edc8547a92b6192aa1f1b6bb78233fa21dc9b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:26:00 +0200 Subject: [PATCH 253/313] mlxsw: spectrum_acl_tcam: Fix warning during rehash As previously explained, the rehash delayed work migrates filters from one region to another. This is done by iterating over all chunks (all the filters with the same priority) in the region and in each chunk iterating over all the filters. When the work runs out of credits it stores the current chunk and entry as markers in the per-work context so that it would know where to resume the migration from the next time the work is scheduled. Upon error, the chunk marker is reset to NULL, but without resetting the entry markers despite being relative to it. This can result in migration being resumed from an entry that does not belong to the chunk being migrated. In turn, this will eventually lead to a chunk being iterated over as if it is an entry. Because of how the two structures happen to be defined, this does not lead to KASAN splats, but to warnings such as [1]. Fix by creating a helper that resets all the markers and call it from all the places the currently only reset the chunk marker. For good measures also call it when starting a completely new rehash. Add a warning to avoid future cases. [1] WARNING: CPU: 7 PID: 1076 at drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c:407 mlxsw_afk_encode+0x242/0x2f0 Modules linked in: CPU: 7 PID: 1076 Comm: kworker/7:24 Tainted: G W 6.9.0-rc3-custom-00880-g29e61d91b77b #29 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:mlxsw_afk_encode+0x242/0x2f0 [...] Call Trace: mlxsw_sp_acl_atcam_entry_add+0xd9/0x3c0 mlxsw_sp_acl_tcam_entry_create+0x5e/0xa0 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x109/0x290 mlxsw_sp_acl_tcam_vregion_rehash_work+0x6c/0x470 process_one_work+0x151/0x370 worker_thread+0x2cb/0x3e0 kthread+0xd0/0x100 ret_from_fork+0x34/0x50 Fixes: 6f9579d4e302 ("mlxsw: spectrum_acl: Remember where to continue rehash migration") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/cc17eed86b41dd829d39b07906fec074a9ce580e.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlxsw/spectrum_acl_tcam.c | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 0902eb7651e1..e8c607886621 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -730,6 +730,17 @@ static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); } +static void +mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + /* The entry markers are relative to the current chunk and therefore + * needs to be reset together with the chunk marker. + */ + ctx->current_vchunk = NULL; + ctx->start_ventry = NULL; + ctx->stop_ventry = NULL; +} + static void mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(struct mlxsw_sp_acl_tcam_vchunk *vchunk) { @@ -752,7 +763,7 @@ mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(struct mlxsw_sp_acl_tcam_vregion *v * the current chunk pointer to make sure all chunks * are properly migrated. */ - vregion->rehash.ctx.current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(&vregion->rehash.ctx); } static struct mlxsw_sp_acl_tcam_vregion * @@ -1220,7 +1231,7 @@ mlxsw_sp_acl_tcam_vchunk_migrate_end(struct mlxsw_sp *mlxsw_sp, { mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); vchunk->chunk2 = NULL; - ctx->current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); } static int @@ -1252,6 +1263,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, ventry = list_first_entry(&vchunk->ventry_list, typeof(*ventry), list); + WARN_ON(ventry->vchunk != vchunk); + list_for_each_entry_from(ventry, &vchunk->ventry_list, list) { /* During rollback, once we reach the ventry that failed * to migrate, we are done. @@ -1343,7 +1356,7 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, * to vregion->region. */ swap(vregion->region, vregion->region2); - ctx->current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); ctx->this_is_rollback = true; err2 = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); @@ -1402,6 +1415,7 @@ mlxsw_sp_acl_tcam_vregion_rehash_start(struct mlxsw_sp *mlxsw_sp, ctx->hints_priv = hints_priv; ctx->this_is_rollback = false; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); return 0; From b377add0f0117409c418ddd6504bd682ebe0bf79 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:26:01 +0200 Subject: [PATCH 254/313] mlxsw: spectrum_acl_tcam: Fix incorrect list API usage Both the function that migrates all the chunks within a region and the function that migrates all the entries within a chunk call list_first_entry() on the respective lists without checking that the lists are not empty. This is incorrect usage of the API, which leads to the following warning [1]. Fix by returning if the lists are empty as there is nothing to migrate in this case. [1] WARNING: CPU: 0 PID: 6437 at drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c:1266 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x1f1/0> Modules linked in: CPU: 0 PID: 6437 Comm: kworker/0:37 Not tainted 6.9.0-rc3-custom-00883-g94a65f079ef6 #39 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:mlxsw_sp_acl_tcam_vchunk_migrate_all+0x1f1/0x2c0 [...] Call Trace: mlxsw_sp_acl_tcam_vregion_rehash_work+0x6c/0x4a0 process_one_work+0x151/0x370 worker_thread+0x2cb/0x3e0 kthread+0xd0/0x100 ret_from_fork+0x34/0x50 ret_from_fork_asm+0x1a/0x30 Fixes: 6f9579d4e302 ("mlxsw: spectrum_acl: Remember where to continue rehash migration") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/4628e9a22d1d84818e28310abbbc498e7bc31bc9.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index e8c607886621..89a5ebc3463f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1254,6 +1254,9 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, return 0; } + if (list_empty(&vchunk->ventry_list)) + goto out; + /* If the migration got interrupted, we have the ventry to start from * stored in context. */ @@ -1305,6 +1308,7 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, } } +out: mlxsw_sp_acl_tcam_vchunk_migrate_end(mlxsw_sp, vchunk, ctx); return 0; } @@ -1318,6 +1322,9 @@ mlxsw_sp_acl_tcam_vchunk_migrate_all(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_vchunk *vchunk; int err; + if (list_empty(&vregion->vchunk_list)) + return 0; + /* If the migration got interrupted, we have the vchunk * we are working on stored in context. */ From fb4e2b70a7194b209fc7320bbf33b375f7114bd5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:26:02 +0200 Subject: [PATCH 255/313] mlxsw: spectrum_acl_tcam: Fix memory leak when canceling rehash work The rehash delayed work is rescheduled with a delay if the number of credits at end of the work is not negative as supposedly it means that the migration ended. Otherwise, it is rescheduled immediately. After "mlxsw: spectrum_acl_tcam: Fix possible use-after-free during rehash" the above is no longer accurate as a non-negative number of credits is no longer indicative of the migration being done. It can also happen if the work encountered an error in which case the migration will resume the next time the work is scheduled. The significance of the above is that it is possible for the work to be pending and associated with hints that were allocated when the migration started. This leads to the hints being leaked [1] when the work is canceled while pending as part of ACL region dismantle. Fix by freeing the hints if hints are associated with a work that was canceled while pending. Blame the original commit since the reliance on not having a pending work associated with hints is fragile. [1] unreferenced object 0xffff88810e7c3000 (size 256): comm "kworker/0:16", pid 176, jiffies 4295460353 hex dump (first 32 bytes): 00 30 95 11 81 88 ff ff 61 00 00 00 00 00 00 80 .0......a....... 00 00 61 00 40 00 00 00 00 00 00 00 04 00 00 00 ..a.@........... backtrace (crc 2544ddb9): [<00000000cf8cfab3>] kmalloc_trace+0x23f/0x2a0 [<000000004d9a1ad9>] objagg_hints_get+0x42/0x390 [<000000000b143cf3>] mlxsw_sp_acl_erp_rehash_hints_get+0xca/0x400 [<0000000059bdb60a>] mlxsw_sp_acl_tcam_vregion_rehash_work+0x868/0x1160 [<00000000e81fd734>] process_one_work+0x59c/0xf20 [<00000000ceee9e81>] worker_thread+0x799/0x12c0 [<00000000bda6fe39>] kthread+0x246/0x300 [<0000000070056d23>] ret_from_fork+0x34/0x70 [<00000000dea2b93e>] ret_from_fork_asm+0x1a/0x30 Fixes: c9c9af91f1d9 ("mlxsw: spectrum_acl: Allow to interrupt/continue rehash work") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/0cc12ebb07c4d4c41a1265ee2c28b392ff997a86.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 89a5ebc3463f..92a406f02eae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -836,10 +836,14 @@ mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam = vregion->tcam; if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) { + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx = &vregion->rehash.ctx; + mutex_lock(&tcam->lock); list_del(&vregion->tlist); mutex_unlock(&tcam->lock); - cancel_delayed_work_sync(&vregion->rehash.dw); + if (cancel_delayed_work_sync(&vregion->rehash.dw) && + ctx->hints_priv) + ops->region_rehash_hints_put(ctx->hints_priv); } mlxsw_sp_acl_tcam_vgroup_vregion_detach(mlxsw_sp, vregion); if (vregion->region2) From 8092162335554c8ef5e7f50eff68aa9cfbdbf865 Mon Sep 17 00:00:00 2001 From: Edward Liaw Date: Thu, 11 Apr 2024 23:19:49 +0000 Subject: [PATCH 256/313] selftests/harness: remove use of LINE_MAX Android was seeing a compliation error because its C library does not define LINE_MAX. This replaces the use of LINE_MAX / snprintf with asprintf, which will change the behavior to not truncate the test name if it is over 2048 chars long. See also: https://github.com/llvm/llvm-project/issues/88119 [akpm@linux-foundation.org: remove limits.h include, per Edward] [akpm@linux-foundation.org: check asprintf() return] [usama.anjum@collabora.com: fix undeclared function error] Link: https://lkml.kernel.org/r/20240417075530.3807625-1-usama.anjum@collabora.com Link: https://lkml.kernel.org/r/20240411231954.62156-1-edliaw@google.com Fixes: 38c957f07038 ("selftests: kselftest_harness: generate test name once") Signed-off-by: Edward Liaw Signed-off-by: Muhammad Usama Anjum Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Bill Wendling Cc: David Hildenbrand Cc: Edward Liaw Cc: Justin Stitt Cc: Kees Cook Cc: "Mike Rapoport (IBM)" Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Xu Cc: Shuah Khan Cc: Will Drewry Signed-off-by: Andrew Morton --- tools/testing/selftests/kselftest_harness.h | 12 ++++++++---- tools/testing/selftests/mm/mdwe_test.c | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 4fd735e48ee7..f0ae1f6466db 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include @@ -1156,7 +1155,7 @@ void __run_test(struct __fixture_metadata *f, struct __test_metadata *t) { struct __test_xfail *xfail; - char test_name[LINE_MAX]; + char *test_name; const char *diagnostic; /* reset test struct */ @@ -1164,8 +1163,12 @@ void __run_test(struct __fixture_metadata *f, t->trigger = 0; memset(t->results->reason, 0, sizeof(t->results->reason)); - snprintf(test_name, sizeof(test_name), "%s%s%s.%s", - f->name, variant->name[0] ? "." : "", variant->name, t->name); + if (asprintf(&test_name, "%s%s%s.%s", f->name, + variant->name[0] ? "." : "", variant->name, t->name) == -1) { + ksft_print_msg("ERROR ALLOCATING MEMORY\n"); + t->exit_code = KSFT_FAIL; + _exit(t->exit_code); + } ksft_print_msg(" RUN %s ...\n", test_name); @@ -1203,6 +1206,7 @@ void __run_test(struct __fixture_metadata *f, ksft_test_result_code(t->exit_code, test_name, diagnostic ? "%s" : "", diagnostic); + free(test_name); } static int test_harness_run(int argc, char **argv) diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c index 200bedcdc32e..1e01d3ddc11c 100644 --- a/tools/testing/selftests/mm/mdwe_test.c +++ b/tools/testing/selftests/mm/mdwe_test.c @@ -7,6 +7,7 @@ #include #include +#define _GNU_SOURCE #include #include #include From 6db7412c142006985a15765785cf6c0c0dd75374 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Tue, 16 Apr 2024 21:26:58 +0500 Subject: [PATCH 257/313] selftests: mm: fix unused and uninitialized variable warning Fix the warnings by initializing and marking the variable as unused. I've caught the warnings by using clang. split_huge_page_test.c:303:6: warning: variable 'dummy' set but not used [-Wunused-but-set-variable] 303 | int dummy; | ^ split_huge_page_test.c:343:3: warning: variable 'dummy' is uninitialized when used here [-Wuninitialized] 343 | dummy += *(*addr + i); | ^~~~~ split_huge_page_test.c:303:11: note: initialize the variable 'dummy' to silence this warning 303 | int dummy; | ^ | = 0 2 warnings generated. Link: https://lkml.kernel.org/r/20240416162658.3353622-1-usama.anjum@collabora.com Fixes: fc4d182316bd ("mm: huge_memory: enable debugfs to split huge pages to any order") Signed-off-by: Muhammad Usama Anjum Reviewed-by: Zi Yan Cc: Bill Wendling Cc: Justin Stitt Cc: Muhammad Usama Anjum Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/split_huge_page_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 6c988bd2f335..d3c7f5fb3e7b 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -300,7 +300,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr) { size_t i; - int dummy; + int __attribute__((unused)) dummy = 0; srand(time(NULL)); From b76b46902c2d0395488c8412e1116c2486cdfcb2 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 17 Apr 2024 17:18:35 -0400 Subject: [PATCH 258/313] mm/hugetlb: fix missing hugetlb_lock for resv uncharge There is a recent report on UFFDIO_COPY over hugetlb: https://lore.kernel.org/all/000000000000ee06de0616177560@google.com/ 350: lockdep_assert_held(&hugetlb_lock); Should be an issue in hugetlb but triggered in an userfault context, where it goes into the unlikely path where two threads modifying the resv map together. Mike has a fix in that path for resv uncharge but it looks like the locking criteria was overlooked: hugetlb_cgroup_uncharge_folio_rsvd() will update the cgroup pointer, so it requires to be called with the lock held. Link: https://lkml.kernel.org/r/20240417211836.2742593-3-peterx@redhat.com Fixes: 79aa925bf239 ("hugetlb_cgroup: fix reservation accounting") Signed-off-by: Peter Xu Reported-by: syzbot+4b8077a5fccc61c385a1@syzkaller.appspotmail.com Reviewed-by: Mina Almasry Cc: David Hildenbrand Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 31d00eee028f..53e0ab5c0845 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3268,9 +3268,12 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, rsv_adjust = hugepage_subpool_put_pages(spool, 1); hugetlb_acct_memory(h, -rsv_adjust); - if (deferred_reserve) + if (deferred_reserve) { + spin_lock_irq(&hugetlb_lock); hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h), pages_per_huge_page(h), folio); + spin_unlock_irq(&hugetlb_lock); + } } if (!memcg_charge_ret) From 12bbaae7635a56049779db3bef6e7140d9aa5f67 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Mar 2024 14:24:40 +0000 Subject: [PATCH 259/313] mm: create FOLIO_FLAG_FALSE and FOLIO_TYPE_OPS macros Following the separation of FOLIO_FLAGS from PAGEFLAGS, separate FOLIO_FLAG_FALSE from PAGEFLAG_FALSE and FOLIO_TYPE_OPS from PAGE_TYPE_OPS. Link: https://lkml.kernel.org/r/20240321142448.1645400-3-willy@infradead.org Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Miaohe Lin Cc: Muchun Song Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 72 +++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 652d77805e99..dc1607f1415e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -458,30 +458,51 @@ static __always_inline int TestClearPage##uname(struct page *page) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) +#define FOLIO_TEST_FLAG_FALSE(name) \ +static inline bool folio_test_##name(const struct folio *folio) \ +{ return false; } +#define FOLIO_SET_FLAG_NOOP(name) \ +static inline void folio_set_##name(struct folio *folio) { } +#define FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void folio_clear_##name(struct folio *folio) { } +#define __FOLIO_SET_FLAG_NOOP(name) \ +static inline void __folio_set_##name(struct folio *folio) { } +#define __FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void __folio_clear_##name(struct folio *folio) { } +#define FOLIO_TEST_SET_FLAG_FALSE(name) \ +static inline bool folio_test_set_##name(struct folio *folio) \ +{ return false; } +#define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \ +static inline bool folio_test_clear_##name(struct folio *folio) \ +{ return false; } + +#define FOLIO_FLAG_FALSE(name) \ +FOLIO_TEST_FLAG_FALSE(name) \ +FOLIO_SET_FLAG_NOOP(name) \ +FOLIO_CLEAR_FLAG_NOOP(name) + #define TESTPAGEFLAG_FALSE(uname, lname) \ -static inline bool folio_test_##lname(const struct folio *folio) { return false; } \ +FOLIO_TEST_FLAG_FALSE(lname) \ static inline int Page##uname(const struct page *page) { return 0; } #define SETPAGEFLAG_NOOP(uname, lname) \ -static inline void folio_set_##lname(struct folio *folio) { } \ +FOLIO_SET_FLAG_NOOP(lname) \ static inline void SetPage##uname(struct page *page) { } #define CLEARPAGEFLAG_NOOP(uname, lname) \ -static inline void folio_clear_##lname(struct folio *folio) { } \ +FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void ClearPage##uname(struct page *page) { } #define __CLEARPAGEFLAG_NOOP(uname, lname) \ -static inline void __folio_clear_##lname(struct folio *folio) { } \ +__FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void __ClearPage##uname(struct page *page) { } #define TESTSETFLAG_FALSE(uname, lname) \ -static inline bool folio_test_set_##lname(struct folio *folio) \ -{ return 0; } \ +FOLIO_TEST_SET_FLAG_FALSE(lname) \ static inline int TestSetPage##uname(struct page *page) { return 0; } #define TESTCLEARFLAG_FALSE(uname, lname) \ -static inline bool folio_test_clear_##lname(struct folio *folio) \ -{ return 0; } \ +FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \ static inline int TestClearPage##uname(struct page *page) { return 0; } #define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ @@ -977,35 +998,38 @@ static inline int page_has_type(const struct page *page) return page_type_has_type(page->page_type); } -#define PAGE_TYPE_OPS(uname, lname, fname) \ -static __always_inline int Page##uname(const struct page *page) \ -{ \ - return PageType(page, PG_##lname); \ -} \ -static __always_inline int folio_test_##fname(const struct folio *folio)\ +#define FOLIO_TYPE_OPS(lname, fname) \ +static __always_inline bool folio_test_##fname(const struct folio *folio)\ { \ return folio_test_type(folio, PG_##lname); \ } \ -static __always_inline void __SetPage##uname(struct page *page) \ -{ \ - VM_BUG_ON_PAGE(!PageType(page, 0), page); \ - page->page_type &= ~PG_##lname; \ -} \ static __always_inline void __folio_set_##fname(struct folio *folio) \ { \ VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ folio->page.page_type &= ~PG_##lname; \ } \ -static __always_inline void __ClearPage##uname(struct page *page) \ -{ \ - VM_BUG_ON_PAGE(!Page##uname(page), page); \ - page->page_type |= PG_##lname; \ -} \ static __always_inline void __folio_clear_##fname(struct folio *folio) \ { \ VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ folio->page.page_type |= PG_##lname; \ +} + +#define PAGE_TYPE_OPS(uname, lname, fname) \ +FOLIO_TYPE_OPS(lname, fname) \ +static __always_inline int Page##uname(const struct page *page) \ +{ \ + return PageType(page, PG_##lname); \ } \ +static __always_inline void __SetPage##uname(struct page *page) \ +{ \ + VM_BUG_ON_PAGE(!PageType(page, 0), page); \ + page->page_type &= ~PG_##lname; \ +} \ +static __always_inline void __ClearPage##uname(struct page *page) \ +{ \ + VM_BUG_ON_PAGE(!Page##uname(page), page); \ + page->page_type |= PG_##lname; \ +} /* * PageBuddy() indicates that the page is free and in the buddy system From fd1a745ce03e37945674c14833870a9af0882e2d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Mar 2024 14:24:42 +0000 Subject: [PATCH 260/313] mm: support page_mapcount() on page_has_type() pages Return 0 for pages which can't be mapped. This matches how page_mapped() works. It is more convenient for users to not have to filter out these pages. Link: https://lkml.kernel.org/r/20240321142448.1645400-5-willy@infradead.org Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Miaohe Lin Cc: Muchun Song Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- fs/proc/page.c | 7 ++----- include/linux/mm.h | 8 +++++--- include/linux/page-flags.h | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/proc/page.c b/fs/proc/page.c index 195b077c0fac..9223856c934b 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -67,7 +67,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, */ ppage = pfn_to_online_page(pfn); - if (!ppage || PageSlab(ppage) || page_has_type(ppage)) + if (!ppage) pcount = 0; else pcount = page_mapcount(ppage); @@ -124,11 +124,8 @@ u64 stable_page_flags(struct page *page) /* * pseudo flags for the well known (anonymous) memory mapped pages - * - * Note that page->_mapcount is overloaded in SLAB, so the - * simple test in page_mapped() is not enough. */ - if (!PageSlab(page) && page_mapped(page)) + if (page_mapped(page)) u |= 1 << KPF_MMAP; if (PageAnon(page)) u |= 1 << KPF_ANON; diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b0ee64225de..b6bdaa18b9e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1223,14 +1223,16 @@ static inline void page_mapcount_reset(struct page *page) * a large folio, it includes the number of times this page is mapped * as part of that folio. * - * The result is undefined for pages which cannot be mapped into userspace. - * For example SLAB or special types of pages. See function page_has_type(). - * They use this field in struct page differently. + * Will report 0 for pages which cannot be mapped into userspace, eg + * slab, page tables and similar. */ static inline int page_mapcount(struct page *page) { int mapcount = atomic_read(&page->_mapcount) + 1; + /* Handle page_has_type() pages */ + if (mapcount < 0) + mapcount = 0; if (unlikely(PageCompound(page))) mapcount += folio_entire_mapcount(page_folio(page)); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index dc1607f1415e..35a0087d0910 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -971,12 +971,12 @@ static inline bool is_page_hwpoison(struct page *page) * page_type may be used. Because it is initialised to -1, we invert the * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and - * low bits so that an underflow or overflow of page_mapcount() won't be + * low bits so that an underflow or overflow of _mapcount won't be * mistaken for a page type value. */ #define PAGE_TYPE_BASE 0xf0000000 -/* Reserve 0x0000007f to catch underflows of page_mapcount */ +/* Reserve 0x0000007f to catch underflows of _mapcount */ #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_offline 0x00000100 From d99e3140a4d33e26066183ff727d8f02f56bec64 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Mar 2024 14:24:43 +0000 Subject: [PATCH 261/313] mm: turn folio_test_hugetlb into a PageType The current folio_test_hugetlb() can be fooled by a concurrent folio split into returning true for a folio which has never belonged to hugetlbfs. This can't happen if the caller holds a refcount on it, but we have a few places (memory-failure, compaction, procfs) which do not and should not take a speculative reference. Since hugetlb pages do not use individual page mapcounts (they are always fully mapped and use the entire_mapcount field to record the number of mappings), the PageType field is available now that page_mapcount() ignores the value in this field. In compaction and with CONFIG_DEBUG_VM enabled, the current implementation can result in an oops, as reported by Luis. This happens since 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") effectively added some VM_BUG_ON() checks in the PageHuge() testing path. [willy@infradead.org: update vmcoreinfo] Link: https://lkml.kernel.org/r/ZgGZUvsdhaT1Va-T@casper.infradead.org Link: https://lkml.kernel.org/r/20240321142448.1645400-6-willy@infradead.org Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Reported-by: Luis Chamberlain Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227 Cc: Miaohe Lin Cc: Muchun Song Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 70 ++++++++++++++++------------------ include/trace/events/mmflags.h | 1 + kernel/vmcore_info.c | 5 +-- mm/hugetlb.c | 22 ++--------- 4 files changed, 39 insertions(+), 59 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 35a0087d0910..4bf1c25fd1dc 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -190,7 +190,6 @@ enum pageflags { /* At least one page in this folio has the hwpoison flag set */ PG_has_hwpoisoned = PG_error, - PG_hugetlb = PG_active, PG_large_rmappable = PG_workingset, /* anon or file-backed */ }; @@ -876,29 +875,6 @@ TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable) #define PG_head_mask ((1UL << PG_head)) -#ifdef CONFIG_HUGETLB_PAGE -int PageHuge(const struct page *page); -SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) -CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) - -/** - * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs - * @folio: The folio to test. - * - * Context: Any context. Caller should have a reference on the folio to - * prevent it from being turned into a tail page. - * Return: True for hugetlbfs folios, false for anon folios or folios - * belonging to other filesystems. - */ -static inline bool folio_test_hugetlb(const struct folio *folio) -{ - return folio_test_large(folio) && - test_bit(PG_hugetlb, const_folio_flags(folio, 1)); -} -#else -TESTPAGEFLAG_FALSE(Huge, hugetlb) -#endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -954,18 +930,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif -/* - * Check if a page is currently marked HWPoisoned. Note that this check is - * best effort only and inherently racy: there is no way to synchronize with - * failing hardware. - */ -static inline bool is_page_hwpoison(struct page *page) -{ - if (PageHWPoison(page)) - return true; - return PageHuge(page) && PageHWPoison(compound_head(page)); -} - /* * For pages that are never mapped to userspace (and aren't PageSlab), * page_type may be used. Because it is initialised to -1, we invert the @@ -982,6 +946,7 @@ static inline bool is_page_hwpoison(struct page *page) #define PG_offline 0x00000100 #define PG_table 0x00000200 #define PG_guard 0x00000400 +#define PG_hugetlb 0x00000800 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -1076,6 +1041,37 @@ PAGE_TYPE_OPS(Table, table, pgtable) */ PAGE_TYPE_OPS(Guard, guard, guard) +#ifdef CONFIG_HUGETLB_PAGE +FOLIO_TYPE_OPS(hugetlb, hugetlb) +#else +FOLIO_TEST_FLAG_FALSE(hugetlb) +#endif + +/** + * PageHuge - Determine if the page belongs to hugetlbfs + * @page: The page to test. + * + * Context: Any context. + * Return: True for hugetlbfs pages, false for anon pages or pages + * belonging to other filesystems. + */ +static inline bool PageHuge(const struct page *page) +{ + return folio_test_hugetlb(page_folio(page)); +} + +/* + * Check if a page is currently marked HWPoisoned. Note that this check is + * best effort only and inherently racy: there is no way to synchronize with + * failing hardware. + */ +static inline bool is_page_hwpoison(struct page *page) +{ + if (PageHWPoison(page)) + return true; + return PageHuge(page) && PageHWPoison(compound_head(page)); +} + extern bool is_free_buddy_page(struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); @@ -1142,7 +1138,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ - 1UL << PG_hugetlb | 1UL << PG_large_rmappable) + 1UL << PG_large_rmappable) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index d801409b33cf..d55e53ac91bd 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3) #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) } #define __def_pagetype_names \ + DEF_PAGETYPE_NAME(hugetlb), \ DEF_PAGETYPE_NAME(offline), \ DEF_PAGETYPE_NAME(guard), \ DEF_PAGETYPE_NAME(table), \ diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index f95516cd45bb..23c125c2e243 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -205,11 +205,10 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PG_head_mask); #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); -#ifdef CONFIG_HUGETLB_PAGE - VMCOREINFO_NUMBER(PG_hugetlb); +#define PAGE_HUGETLB_MAPCOUNT_VALUE (~PG_hugetlb) + VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE); #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); -#endif #ifdef CONFIG_KALLSYMS VMCOREINFO_SYMBOL(kallsyms_names); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 53e0ab5c0845..4553241f0fb2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1624,7 +1624,7 @@ static inline void __clear_hugetlb_destructor(struct hstate *h, { lockdep_assert_held(&hugetlb_lock); - folio_clear_hugetlb(folio); + __folio_clear_hugetlb(folio); } /* @@ -1711,7 +1711,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, h->surplus_huge_pages_node[nid]++; } - folio_set_hugetlb(folio); + __folio_set_hugetlb(folio); folio_change_private(folio, NULL); /* * We have to set hugetlb_vmemmap_optimized again as above @@ -2049,7 +2049,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid) static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) { - folio_set_hugetlb(folio); + __folio_set_hugetlb(folio); INIT_LIST_HEAD(&folio->lru); hugetlb_set_folio_subpool(folio, NULL); set_hugetlb_cgroup(folio, NULL); @@ -2159,22 +2159,6 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio, return __prep_compound_gigantic_folio(folio, order, true); } -/* - * PageHuge() only returns true for hugetlbfs pages, but not for normal or - * transparent huge pages. See the PageTransHuge() documentation for more - * details. - */ -int PageHuge(const struct page *page) -{ - const struct folio *folio; - - if (!PageCompound(page)) - return 0; - folio = page_folio(page); - return folio_test_hugetlb(folio); -} -EXPORT_SYMBOL_GPL(PageHuge); - /* * Find and lock address space (mapping) in write mode. * From 682886ec69d22363819a83ddddd5d66cb5c791e1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 18 Apr 2024 08:26:28 -0400 Subject: [PATCH 262/313] mm: zswap: fix shrinker NULL crash with cgroup_disable=memory Christian reports a NULL deref in zswap that he bisected down to the zswap shrinker. The issue also cropped up in the bug trackers of libguestfs [1] and the Red Hat bugzilla [2]. The problem is that when memcg is disabled with the boot time flag, the zswap shrinker might get called with sc->memcg == NULL. This is okay in many places, like the lruvec operations. But it crashes in memcg_page_state() - which is only used due to the non-node accounting of cgroup's the zswap memory to begin with. Nhat spotted that the memcg can be NULL in the memcg-disabled case, and I was then able to reproduce the crash locally as well. [1] https://github.com/libguestfs/libguestfs/issues/139 [2] https://bugzilla.redhat.com/show_bug.cgi?id=2275252 Link: https://lkml.kernel.org/r/20240418124043.GC1055428@cmpxchg.org Link: https://lkml.kernel.org/r/20240417143324.GA1055428@cmpxchg.org Fixes: b5ba474f3f51 ("zswap: shrink zswap pool based on memory pressure") Signed-off-by: Johannes Weiner Reported-by: Christian Heusel Debugged-by: Nhat Pham Suggested-by: Nhat Pham Tested-by: Christian Heusel Acked-by: Yosry Ahmed Cc: Chengming Zhou Cc: Dan Streetman Cc: Richard W.M. Jones Cc: Seth Jennings Cc: Vitaly Wool Cc: [v6.8] Signed-off-by: Andrew Morton --- mm/zswap.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index caed028945b0..6f8850c44b61 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1331,15 +1331,22 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker, if (!gfp_has_io_fs(sc->gfp_mask)) return 0; -#ifdef CONFIG_MEMCG_KMEM - mem_cgroup_flush_stats(memcg); - nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; - nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); -#else - /* use pool stats instead of memcg stats */ - nr_backing = zswap_pool_total_size >> PAGE_SHIFT; - nr_stored = atomic_read(&zswap_nr_stored); -#endif + /* + * For memcg, use the cgroup-wide ZSWAP stats since we don't + * have them per-node and thus per-lruvec. Careful if memcg is + * runtime-disabled: we can get sc->memcg == NULL, which is ok + * for the lruvec, but not for memcg_page_state(). + * + * Without memcg, use the zswap pool-wide metrics. + */ + if (!mem_cgroup_disabled()) { + mem_cgroup_flush_stats(memcg); + nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; + nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); + } else { + nr_backing = zswap_pool_total_size >> PAGE_SHIFT; + nr_stored = atomic_read(&zswap_nr_stored); + } if (!nr_stored) return 0; From 37641efaa3faa4b8292aba4bbd7d71c0b703a239 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 15 Apr 2024 14:17:47 -0700 Subject: [PATCH 263/313] hugetlb: check for anon_vma prior to folio allocation Commit 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()") may bailout after allocating a folio if we do not hold the mmap lock. When this occurs, vmf_anon_prepare() will release the vma lock. Hugetlb then attempts to call restore_reserve_on_error(), which depends on the vma lock being held. We can move vmf_anon_prepare() prior to the folio allocation in order to avoid calling restore_reserve_on_error() without the vma lock. Link: https://lkml.kernel.org/r/ZiFqSrSRLhIV91og@fedora Fixes: 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()") Reported-by: syzbot+ad1b592fc4483655438b@syzkaller.appspotmail.com Signed-off-by: Vishal Moola (Oracle) Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4553241f0fb2..05371bf54f96 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6261,6 +6261,12 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, VM_UFFD_MISSING); } + if (!(vma->vm_flags & VM_MAYSHARE)) { + ret = vmf_anon_prepare(vmf); + if (unlikely(ret)) + goto out; + } + folio = alloc_hugetlb_folio(vma, haddr, 0); if (IS_ERR(folio)) { /* @@ -6297,15 +6303,12 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, */ restore_reserve_on_error(h, vma, haddr, folio); folio_put(folio); + ret = VM_FAULT_SIGBUS; goto out; } new_pagecache_folio = true; } else { folio_lock(folio); - - ret = vmf_anon_prepare(vmf); - if (unlikely(ret)) - goto backout_unlocked; anon_rmap = 1; } } else { From 6fe60465e1d53ea321ee909be26d97529e8f746c Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 18 Apr 2024 16:11:33 +0200 Subject: [PATCH 264/313] stackdepot: respect __GFP_NOLOCKDEP allocation flag If stack_depot_save_flags() allocates memory it always drops __GFP_NOLOCKDEP flag. So when KASAN tries to track __GFP_NOLOCKDEP allocation we may end up with lockdep splat like bellow: ====================================================== WARNING: possible circular locking dependency detected 6.9.0-rc3+ #49 Not tainted ------------------------------------------------------ kswapd0/149 is trying to acquire lock: ffff88811346a920 (&xfs_nondir_ilock_class){++++}-{4:4}, at: xfs_reclaim_inode+0x3ac/0x590 [xfs] but task is already holding lock: ffffffff8bb33100 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x5d9/0xad0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (fs_reclaim){+.+.}-{0:0}: __lock_acquire+0x7da/0x1030 lock_acquire+0x15d/0x400 fs_reclaim_acquire+0xb5/0x100 prepare_alloc_pages.constprop.0+0xc5/0x230 __alloc_pages+0x12a/0x3f0 alloc_pages_mpol+0x175/0x340 stack_depot_save_flags+0x4c5/0x510 kasan_save_stack+0x30/0x40 kasan_save_track+0x10/0x30 __kasan_slab_alloc+0x83/0x90 kmem_cache_alloc+0x15e/0x4a0 __alloc_object+0x35/0x370 __create_object+0x22/0x90 __kmalloc_node_track_caller+0x477/0x5b0 krealloc+0x5f/0x110 xfs_iext_insert_raw+0x4b2/0x6e0 [xfs] xfs_iext_insert+0x2e/0x130 [xfs] xfs_iread_bmbt_block+0x1a9/0x4d0 [xfs] xfs_btree_visit_block+0xfb/0x290 [xfs] xfs_btree_visit_blocks+0x215/0x2c0 [xfs] xfs_iread_extents+0x1a2/0x2e0 [xfs] xfs_buffered_write_iomap_begin+0x376/0x10a0 [xfs] iomap_iter+0x1d1/0x2d0 iomap_file_buffered_write+0x120/0x1a0 xfs_file_buffered_write+0x128/0x4b0 [xfs] vfs_write+0x675/0x890 ksys_write+0xc3/0x160 do_syscall_64+0x94/0x170 entry_SYSCALL_64_after_hwframe+0x71/0x79 Always preserve __GFP_NOLOCKDEP to fix this. Link: https://lkml.kernel.org/r/20240418141133.22950-1-ryabinin.a.a@gmail.com Fixes: cd11016e5f52 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB") Signed-off-by: Andrey Ryabinin Reported-by: Xiubo Li Closes: https://lore.kernel.org/all/a0caa289-ca02-48eb-9bf2-d86fd47b71f4@redhat.com/ Reported-by: Damien Le Moal Closes: https://lore.kernel.org/all/f9ff999a-e170-b66b-7caf-293f2b147ac2@opensource.wdc.com/ Suggested-by: Dave Chinner Tested-by: Xiubo Li Cc: Christoph Hellwig Cc: Alexander Potapenko Cc: Signed-off-by: Andrew Morton --- lib/stackdepot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 68c97387aa54..cd8f23455285 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -627,10 +627,10 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic - * contexts and I/O. + * contexts, I/O, nolockdep. */ alloc_flags &= ~GFP_ZONEMASK; - alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); + alloc_flags &= (GFP_ATOMIC | GFP_KERNEL | __GFP_NOLOCKDEP); alloc_flags |= __GFP_NOWARN; page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER); if (page) From 8d6bf83f6740ba52a59e25dad360e1e87ef47666 Mon Sep 17 00:00:00 2001 From: Duanqiang Wen Date: Mon, 22 Apr 2024 16:41:08 +0800 Subject: [PATCH 265/313] Revert "net: txgbe: fix i2c dev name cannot match clkdev" This reverts commit c644920ce9220d83e070f575a4df711741c07f07. when register i2c dev, txgbe shorten "i2c_designware" to "i2c_dw", will cause this i2c dev can't match platfom driver i2c_designware_platform. Signed-off-by: Duanqiang Wen Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240422084109.3201-1-duanqiangwen@net-swift.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 2fa511227eac..5b5d5e4310d1 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -20,8 +20,6 @@ #include "txgbe_phy.h" #include "txgbe_hw.h" -#define TXGBE_I2C_CLK_DEV_NAME "i2c_dw" - static int txgbe_swnodes_register(struct txgbe *txgbe) { struct txgbe_nodes *nodes = &txgbe->nodes; @@ -573,8 +571,8 @@ static int txgbe_clock_register(struct txgbe *txgbe) char clk_name[32]; struct clk *clk; - snprintf(clk_name, sizeof(clk_name), "%s.%d", - TXGBE_I2C_CLK_DEV_NAME, pci_dev_id(pdev)); + snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d", + pci_dev_id(pdev)); clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000); if (IS_ERR(clk)) @@ -636,7 +634,7 @@ static int txgbe_i2c_register(struct txgbe *txgbe) info.parent = &pdev->dev; info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]); - info.name = TXGBE_I2C_CLK_DEV_NAME; + info.name = "i2c_designware"; info.id = pci_dev_id(pdev); info.res = &DEFINE_RES_IRQ(pdev->irq); From edd2d250fb3bb5d70419ae82c1f9dbb9684dffd3 Mon Sep 17 00:00:00 2001 From: Duanqiang Wen Date: Mon, 22 Apr 2024 16:41:09 +0800 Subject: [PATCH 266/313] Revert "net: txgbe: fix clk_name exceed MAX_DEV_ID limits" This reverts commit e30cef001da259e8df354b813015d0e5acc08740. commit 99f4570cfba1 ("clkdev: Update clkdev id usage to allow for longer names") can fix clk_name exceed MAX_DEV_ID limits, so this commit is meaningless. Signed-off-by: Duanqiang Wen Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240422084109.3201-2-duanqiangwen@net-swift.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 5b5d5e4310d1..93295916b1d2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -571,7 +571,7 @@ static int txgbe_clock_register(struct txgbe *txgbe) char clk_name[32]; struct clk *clk; - snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d", + snprintf(clk_name, sizeof(clk_name), "i2c_designware.%d", pci_dev_id(pdev)); clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000); From c04d1b9ecce565455652ac3c6b17043cd475cf47 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 22 Apr 2024 13:45:02 -0700 Subject: [PATCH 267/313] igc: Fix LED-related deadlock on driver unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Roman reports a deadlock on unplug of a Thunderbolt docking station containing an Intel I225 Ethernet adapter. The root cause is that led_classdev's for LEDs on the adapter are registered such that they're device-managed by the netdev. That results in recursive acquisition of the rtnl_lock() mutex on unplug: When the driver calls unregister_netdev(), it acquires rtnl_lock(), then frees the device-managed resources. Upon unregistering the LEDs, netdev_trig_deactivate() invokes unregister_netdevice_notifier(), which tries to acquire rtnl_lock() again. Avoid by using non-device-managed LED registration. Stack trace for posterity: schedule+0x6e/0xf0 schedule_preempt_disabled+0x15/0x20 __mutex_lock+0x2a0/0x750 unregister_netdevice_notifier+0x40/0x150 netdev_trig_deactivate+0x1f/0x60 [ledtrig_netdev] led_trigger_set+0x102/0x330 led_classdev_unregister+0x4b/0x110 release_nodes+0x3d/0xb0 devres_release_all+0x8b/0xc0 device_del+0x34f/0x3c0 unregister_netdevice_many_notify+0x80b/0xaf0 unregister_netdev+0x7c/0xd0 igc_remove+0xd8/0x1e0 [igc] pci_device_remove+0x3f/0xb0 Fixes: ea578703b03d ("igc: Add support for LEDs on i225/i226") Reported-by: Roman Lozko Closes: https://lore.kernel.org/r/CAEhC_B=ksywxCG_+aQqXUrGEgKq+4mqnSV8EBHOKbC3-Obj9+Q@mail.gmail.com/ Reported-by: "Marek Marczykowski-Górecki" Closes: https://lore.kernel.org/r/ZhRD3cOtz5i-61PB@mail-itl/ Signed-off-by: Kurt Kanzenbach Signed-off-by: Lukas Wunner Cc: Heiner Kallweit Reviewed-by: Simon Horman Reviewed-by: Kurt Kanzenbach Tested-by: Kurt Kanzenbach # Intel i225 Tested-by: Naama Meir Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240422204503.225448-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc.h | 2 ++ drivers/net/ethernet/intel/igc/igc_leds.c | 38 ++++++++++++++++++----- drivers/net/ethernet/intel/igc/igc_main.c | 3 ++ 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 90316dc58630..6bc56c7c181e 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -298,6 +298,7 @@ struct igc_adapter { /* LEDs */ struct mutex led_mutex; + struct igc_led_classdev *leds; }; void igc_up(struct igc_adapter *adapter); @@ -723,6 +724,7 @@ void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts); void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter); int igc_led_setup(struct igc_adapter *adapter); +void igc_led_free(struct igc_adapter *adapter); #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) diff --git a/drivers/net/ethernet/intel/igc/igc_leds.c b/drivers/net/ethernet/intel/igc/igc_leds.c index bf240c5daf86..3929b25b6ae6 100644 --- a/drivers/net/ethernet/intel/igc/igc_leds.c +++ b/drivers/net/ethernet/intel/igc/igc_leds.c @@ -236,8 +236,8 @@ static void igc_led_get_name(struct igc_adapter *adapter, int index, char *buf, pci_dev_id(adapter->pdev), index); } -static void igc_setup_ldev(struct igc_led_classdev *ldev, - struct net_device *netdev, int index) +static int igc_setup_ldev(struct igc_led_classdev *ldev, + struct net_device *netdev, int index) { struct igc_adapter *adapter = netdev_priv(netdev); struct led_classdev *led_cdev = &ldev->led; @@ -257,24 +257,46 @@ static void igc_setup_ldev(struct igc_led_classdev *ldev, led_cdev->hw_control_get = igc_led_hw_control_get; led_cdev->hw_control_get_device = igc_led_hw_control_get_device; - devm_led_classdev_register(&netdev->dev, led_cdev); + return led_classdev_register(&netdev->dev, led_cdev); } int igc_led_setup(struct igc_adapter *adapter) { struct net_device *netdev = adapter->netdev; - struct device *dev = &netdev->dev; struct igc_led_classdev *leds; - int i; + int i, err; mutex_init(&adapter->led_mutex); - leds = devm_kcalloc(dev, IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL); + leds = kcalloc(IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL); if (!leds) return -ENOMEM; - for (i = 0; i < IGC_NUM_LEDS; i++) - igc_setup_ldev(leds + i, netdev, i); + for (i = 0; i < IGC_NUM_LEDS; i++) { + err = igc_setup_ldev(leds + i, netdev, i); + if (err) + goto err; + } + + adapter->leds = leds; return 0; + +err: + for (i--; i >= 0; i--) + led_classdev_unregister(&((leds + i)->led)); + + kfree(leds); + return err; +} + +void igc_led_free(struct igc_adapter *adapter) +{ + struct igc_led_classdev *leds = adapter->leds; + int i; + + for (i = 0; i < IGC_NUM_LEDS; i++) + led_classdev_unregister(&((leds + i)->led)); + + kfree(leds); } diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 35ad40a803cb..4d975d620a8e 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7021,6 +7021,9 @@ static void igc_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->watchdog_task); hrtimer_cancel(&adapter->hrtimer); + if (IS_ENABLED(CONFIG_IGC_LEDS)) + igc_led_free(adapter); + /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. */ From 730117730709992c9f6535dd7b47638ee561ec45 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 23 Apr 2024 17:21:48 -0700 Subject: [PATCH 268/313] eth: bnxt: fix counting packets discarded due to OOM and netpoll I added OOM and netpoll discard counters, naively assuming that the cpr pointer is pointing to a common completion ring. Turns out that is usually *a* completion ring but not *the* completion ring which bnapi->cp_ring points to. bnapi->cp_ring is where the stats are read from, so we end up reporting 0 thru ethtool -S and qstat even though the drop events have happened. Make 100% sure we're recording statistics in the correct structure. Fixes: 907fd4a294db ("bnxt: count discards due to memory allocation errors") Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20240424002148.3937059-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 44 ++++++++++------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ed04a90a4fdd..2c2ee79c4d77 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1778,7 +1778,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping); if (!skb) { bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } } else { @@ -1788,7 +1788,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, new_data = __bnxt_alloc_rx_frag(bp, &new_mapping, GFP_ATOMIC); if (!new_data) { bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } @@ -1804,7 +1804,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, if (!skb) { skb_free_frag(data); bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } skb_reserve(skb, bp->rx_offset); @@ -1815,7 +1815,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, idx, agg_bufs, true); if (!skb) { /* Page reuse already handled by bnxt_rx_pages(). */ - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } } @@ -2094,11 +2094,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp, cp_cons, agg_bufs, false); - if (!frag_len) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; - } + if (!frag_len) + goto oom_next_rx; } xdp_active = true; } @@ -2121,9 +2118,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, else bnxt_xdp_buff_frags_free(rxr, &xdp); } - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; + goto oom_next_rx; } } else { u32 payload; @@ -2134,29 +2129,21 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, payload = 0; skb = bp->rx_skb_func(bp, rxr, cons, data, data_ptr, dma_addr, payload | len); - if (!skb) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; - } + if (!skb) + goto oom_next_rx; } if (agg_bufs) { if (!xdp_active) { skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, cp_cons, agg_bufs, false); - if (!skb) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; - } + if (!skb) + goto oom_next_rx; } else { skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1); if (!skb) { /* we should be able to free the old skb here */ bnxt_xdp_buff_frags_free(rxr, &xdp); - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; + goto oom_next_rx; } } } @@ -2234,6 +2221,11 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, *raw_cons = tmp_raw_cons; return rc; + +oom_next_rx: + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; + rc = -ENOMEM; + goto next_rx; } /* In netpoll mode, if we are using a combined completion ring, we need to @@ -2280,7 +2272,7 @@ static int bnxt_force_rx_discard(struct bnxt *bp, } rc = bnxt_rx_pkt(bp, cpr, raw_cons, event); if (rc && rc != -EBUSY) - cpr->sw_stats.rx.rx_netpoll_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_netpoll_discards += 1; return rc; } From d7f3040a565214a30e2f07dc9b91566d316e2d36 Mon Sep 17 00:00:00 2001 From: Michael Heimpold Date: Tue, 16 Apr 2024 21:06:58 +0200 Subject: [PATCH 269/313] ARM: dts: imx6ull-tarragon: fix USB over-current polarity Our Tarragon platform uses a active-low signal to inform the i.MX6ULL about the over-current detection. Fixes: 5e4f393ccbf0 ("ARM: dts: imx6ull: Add chargebyte Tarragon support") Signed-off-by: Michael Heimpold Signed-off-by: Stefan Wahren Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi index 3fdece5bd31f..5248a058230c 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi @@ -805,6 +805,7 @@ &usbotg1 { &pinctrl_usb_pwr>; dr_mode = "host"; power-active-high; + over-current-active-low; disable-over-current; status = "okay"; }; From 8e30abc9ace4f0add4cd761dfdbfaebae5632dd2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 24 Apr 2024 20:45:01 +0200 Subject: [PATCH 270/313] netfilter: nf_tables: honor table dormant flag from netdev release event path Check for table dormant flag otherwise netdev release event path tries to unregister an already unregistered hook. [524854.857999] ------------[ cut here ]------------ [524854.858010] WARNING: CPU: 0 PID: 3386599 at net/netfilter/core.c:501 __nf_unregister_net_hook+0x21a/0x260 [...] [524854.858848] CPU: 0 PID: 3386599 Comm: kworker/u32:2 Not tainted 6.9.0-rc3+ #365 [524854.858869] Workqueue: netns cleanup_net [524854.858886] RIP: 0010:__nf_unregister_net_hook+0x21a/0x260 [524854.858903] Code: 24 e8 aa 73 83 ff 48 63 43 1c 83 f8 01 0f 85 3d ff ff ff e8 98 d1 f0 ff 48 8b 3c 24 e8 8f 73 83 ff 48 63 43 1c e9 26 ff ff ff <0f> 0b 48 83 c4 18 48 c7 c7 00 68 e9 82 5b 5d 41 5c 41 5d 41 5e 41 [524854.858914] RSP: 0018:ffff8881e36d79e0 EFLAGS: 00010246 [524854.858926] RAX: 0000000000000000 RBX: ffff8881339ae790 RCX: ffffffff81ba524a [524854.858936] RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffff8881c8a16438 [524854.858945] RBP: ffff8881c8a16438 R08: 0000000000000001 R09: ffffed103c6daf34 [524854.858954] R10: ffff8881e36d79a7 R11: 0000000000000000 R12: 0000000000000005 [524854.858962] R13: ffff8881c8a16000 R14: 0000000000000000 R15: ffff8881351b5a00 [524854.858971] FS: 0000000000000000(0000) GS:ffff888390800000(0000) knlGS:0000000000000000 [524854.858982] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [524854.858991] CR2: 00007fc9be0f16f4 CR3: 00000001437cc004 CR4: 00000000001706f0 [524854.859000] Call Trace: [524854.859006] [524854.859013] ? __warn+0x9f/0x1a0 [524854.859027] ? __nf_unregister_net_hook+0x21a/0x260 [524854.859044] ? report_bug+0x1b1/0x1e0 [524854.859060] ? handle_bug+0x3c/0x70 [524854.859071] ? exc_invalid_op+0x17/0x40 [524854.859083] ? asm_exc_invalid_op+0x1a/0x20 [524854.859100] ? __nf_unregister_net_hook+0x6a/0x260 [524854.859116] ? __nf_unregister_net_hook+0x21a/0x260 [524854.859135] nf_tables_netdev_event+0x337/0x390 [nf_tables] [524854.859304] ? __pfx_nf_tables_netdev_event+0x10/0x10 [nf_tables] [524854.859461] ? packet_notifier+0xb3/0x360 [524854.859476] ? _raw_spin_unlock_irqrestore+0x11/0x40 [524854.859489] ? dcbnl_netdevice_event+0x35/0x140 [524854.859507] ? __pfx_nf_tables_netdev_event+0x10/0x10 [nf_tables] [524854.859661] notifier_call_chain+0x7d/0x140 [524854.859677] unregister_netdevice_many_notify+0x5e1/0xae0 Fixes: d54725cd11a5 ("netfilter: nf_tables: support for multiple devices per netdev hook") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_chain_filter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 274b6f7e6bb5..d170758a1eb5 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -338,7 +338,9 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, return; if (n > 1) { - nf_unregister_net_hook(ctx->net, &found->ops); + if (!(ctx->chain->table->flags & NFT_TABLE_F_DORMANT)) + nf_unregister_net_hook(ctx->net, &found->ops); + list_del_rcu(&found->list); kfree_rcu(found, rcu); return; From 6c9cd59dbcb09a2122b5ce0dfc07c74e6fc00dc0 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Tue, 23 Apr 2024 14:18:28 +0530 Subject: [PATCH 271/313] net: phy: dp83869: Fix MII mode failure The DP83869 driver sets the MII bit (needed for PHY to work in MII mode) only if the op-mode is either DP83869_100M_MEDIA_CONVERT or DP83869_RGMII_100_BASE. Some drivers i.e. ICSSG support MII mode with op-mode as DP83869_RGMII_COPPER_ETHERNET for which the MII bit is not set in dp83869 driver. As a result MII mode on ICSSG doesn't work and below log is seen. TI DP83869 300b2400.mdio:0f: selected op-mode is not valid with MII mode icssg-prueth icssg1-eth: couldn't connect to phy ethernet-phy@0 icssg-prueth icssg1-eth: can't phy connect port MII0 Fix this by setting MII bit for DP83869_RGMII_COPPER_ETHERNET op-mode as well. Fixes: 94e86ef1b801 ("net: phy: dp83869: support mii mode when rgmii strap cfg is used") Signed-off-by: MD Danish Anwar Reviewed-by: Ravi Gunasekaran Signed-off-by: David S. Miller --- drivers/net/phy/dp83869.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index fa8c6fdcf301..d7aaefb5226b 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -695,7 +695,8 @@ static int dp83869_configure_mode(struct phy_device *phydev, phy_ctrl_val = dp83869->mode; if (phydev->interface == PHY_INTERFACE_MODE_MII) { if (dp83869->mode == DP83869_100M_MEDIA_CONVERT || - dp83869->mode == DP83869_RGMII_100_BASE) { + dp83869->mode == DP83869_RGMII_100_BASE || + dp83869->mode == DP83869_RGMII_COPPER_ETHERNET) { phy_ctrl_val |= DP83869_OP_MODE_MII; } else { phydev_err(phydev, "selected op-mode is not valid with MII mode\n"); From c26591afd33adce296c022e3480dea4282b7ef91 Mon Sep 17 00:00:00 2001 From: Guanrui Huang Date: Thu, 18 Apr 2024 14:10:52 +0800 Subject: [PATCH 272/313] irqchip/gic-v3-its: Prevent double free on error The error handling path in its_vpe_irq_domain_alloc() causes a double free when its_vpe_init() fails after successfully allocating at least one interrupt. This happens because its_vpe_irq_domain_free() frees the interrupts along with the area bitmap and the vprop_page and its_vpe_irq_domain_alloc() subsequently frees the area bitmap and the vprop_page again. Fix this by unconditionally invoking its_vpe_irq_domain_free() which handles all cases correctly and by removing the bitmap/vprop_page freeing from its_vpe_irq_domain_alloc(). [ tglx: Massaged change log ] Fixes: 7d75bbb4bc1a ("irqchip/gic-v3-its: Add VPE irq domain allocation/teardown") Signed-off-by: Guanrui Huang Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Reviewed-by: Zenghui Yu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240418061053.96803-2-guanrui.huang@linux.alibaba.com --- drivers/irqchip/irq-gic-v3-its.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 2a537cbfcb07..5f7d3db3afd8 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4567,13 +4567,8 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq irqd_set_resend_when_in_progress(irq_get_irq_data(virq + i)); } - if (err) { - if (i > 0) - its_vpe_irq_domain_free(domain, virq, i); - - its_lpi_free(bitmap, base, nr_ids); - its_free_prop_table(vprop_page); - } + if (err) + its_vpe_irq_domain_free(domain, virq, i); return err; } From fe42754b94a42d08cf9501790afc25c4f6a5f631 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Apr 2024 17:05:54 -0700 Subject: [PATCH 273/313] cpu: Re-enable CPU mitigations by default for !X86 architectures Rename x86's to CPU_MITIGATIONS, define it in generic code, and force it on for all architectures exception x86. A recent commit to turn mitigations off by default if SPECULATION_MITIGATIONS=n kinda sorta missed that "cpu_mitigations" is completely generic, whereas SPECULATION_MITIGATIONS is x86-specific. Rename x86's SPECULATIVE_MITIGATIONS instead of keeping both and have it select CPU_MITIGATIONS, as having two configs for the same thing is unnecessary and confusing. This will also allow x86 to use the knob to manage mitigations that aren't strictly related to speculative execution. Use another Kconfig to communicate to common code that CPU_MITIGATIONS is already defined instead of having x86's menu depend on the common CPU_MITIGATIONS. This allows keeping a single point of contact for all of x86's mitigations, and it's not clear that other architectures *want* to allow disabling mitigations at compile-time. Fixes: f337a6a21e2f ("x86/cpu: Actually turn off mitigations by default for SPECULATION_MITIGATIONS=n") Closes: https://lkml.kernel.org/r/20240413115324.53303a68%40canb.auug.org.au Reported-by: Stephen Rothwell Reported-by: Michael Ellerman Reported-by: Geert Uytterhoeven Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov (AMD) Acked-by: Josh Poimboeuf Acked-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240420000556.2645001-2-seanjc@google.com --- arch/Kconfig | 8 ++++++++ arch/x86/Kconfig | 11 ++++++----- kernel/cpu.c | 4 ++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 65afb1de48b3..30f7930275d8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -9,6 +9,14 @@ # source "arch/$(SRCARCH)/Kconfig" +config ARCH_CONFIGURES_CPU_MITIGATIONS + bool + +if !ARCH_CONFIGURES_CPU_MITIGATIONS +config CPU_MITIGATIONS + def_bool y +endif + menu "General architecture-dependent options" config ARCH_HAS_SUBPAGE_FAULTS diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4474bf32d0a4..619a04d5c131 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -62,6 +62,7 @@ config X86 select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT + select ARCH_CONFIGURES_CPU_MITIGATIONS select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 @@ -2488,17 +2489,17 @@ config PREFIX_SYMBOLS def_bool y depends on CALL_PADDING && !CFI_CLANG -menuconfig SPECULATION_MITIGATIONS - bool "Mitigations for speculative execution vulnerabilities" +menuconfig CPU_MITIGATIONS + bool "Mitigations for CPU vulnerabilities" default y help - Say Y here to enable options which enable mitigations for - speculative execution hardware vulnerabilities. + Say Y here to enable options which enable mitigations for hardware + vulnerabilities (usually related to speculative execution). If you say N, all mitigations will be disabled. You really should know what you are doing to say so. -if SPECULATION_MITIGATIONS +if CPU_MITIGATIONS config MITIGATION_PAGE_TABLE_ISOLATION bool "Remove the kernel mapping in user mode" diff --git a/kernel/cpu.c b/kernel/cpu.c index 07ad53b7f119..bb0ff275fb46 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -3207,8 +3207,8 @@ enum cpu_mitigations { }; static enum cpu_mitigations cpu_mitigations __ro_after_init = - IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : - CPU_MITIGATIONS_OFF; + IS_ENABLED(CONFIG_CPU_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : + CPU_MITIGATIONS_OFF; static int __init mitigations_parse_cmdline(char *arg) { From ce0abef6a1d540acef85068e0e82bdf1fbeeb0e9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Apr 2024 17:05:55 -0700 Subject: [PATCH 274/313] cpu: Ignore "mitigations" kernel parameter if CPU_MITIGATIONS=n Explicitly disallow enabling mitigations at runtime for kernels that were built with CONFIG_CPU_MITIGATIONS=n, as some architectures may omit code entirely if mitigations are disabled at compile time. E.g. on x86, a large pile of Kconfigs are buried behind CPU_MITIGATIONS, and trying to provide sane behavior for retroactively enabling mitigations is extremely difficult, bordering on impossible. E.g. page table isolation and call depth tracking require build-time support, BHI mitigations will still be off without additional kernel parameters, etc. [ bp: Touchups. ] Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov (AMD) Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240420000556.2645001-3-seanjc@google.com --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ arch/x86/Kconfig | 8 ++++++-- include/linux/cpu.h | 11 +++++++++++ kernel/cpu.c | 14 ++++++++++---- 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 902ecd92a29f..213d0719e2b7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3423,6 +3423,9 @@ arch-independent options, each of which is an aggregation of existing arch-specific options. + Note, "mitigations" is supported if and only if the + kernel was built with CPU_MITIGATIONS=y. + off Disable all optional CPU mitigations. This improves system performance, but it may also diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 619a04d5c131..928820e61cb5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2495,9 +2495,13 @@ menuconfig CPU_MITIGATIONS help Say Y here to enable options which enable mitigations for hardware vulnerabilities (usually related to speculative execution). + Mitigations can be disabled or restricted to SMT systems at runtime + via the "mitigations" kernel parameter. - If you say N, all mitigations will be disabled. You really - should know what you are doing to say so. + If you say N, all mitigations will be disabled. This CANNOT be + overridden at runtime. + + Say 'Y', unless you really know what you are doing. if CPU_MITIGATIONS diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 272e4e79e15c..861c3bfc5f17 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -221,7 +221,18 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ +#ifdef CONFIG_CPU_MITIGATIONS extern bool cpu_mitigations_off(void); extern bool cpu_mitigations_auto_nosmt(void); +#else +static inline bool cpu_mitigations_off(void) +{ + return true; +} +static inline bool cpu_mitigations_auto_nosmt(void) +{ + return false; +} +#endif #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index bb0ff275fb46..63447eb85dab 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -3196,6 +3196,7 @@ void __init boot_cpu_hotplug_init(void) this_cpu_write(cpuhp_state.target, CPUHP_ONLINE); } +#ifdef CONFIG_CPU_MITIGATIONS /* * These are used for a global "mitigations=" cmdline option for toggling * optional CPU mitigations. @@ -3206,9 +3207,7 @@ enum cpu_mitigations { CPU_MITIGATIONS_AUTO_NOSMT, }; -static enum cpu_mitigations cpu_mitigations __ro_after_init = - IS_ENABLED(CONFIG_CPU_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : - CPU_MITIGATIONS_OFF; +static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; static int __init mitigations_parse_cmdline(char *arg) { @@ -3224,7 +3223,6 @@ static int __init mitigations_parse_cmdline(char *arg) return 0; } -early_param("mitigations", mitigations_parse_cmdline); /* mitigations=off */ bool cpu_mitigations_off(void) @@ -3239,3 +3237,11 @@ bool cpu_mitigations_auto_nosmt(void) return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; } EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); +#else +static int __init mitigations_parse_cmdline(char *arg) +{ + pr_crit("Kernel compiled without mitigations, ignoring 'mitigations'; system may still be vulnerable\n"); + return 0; +} +#endif +early_param("mitigations", mitigations_parse_cmdline); From f3334ebb8a2a1841c2824594dd992e66de19deb2 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 25 Apr 2024 22:17:52 +0800 Subject: [PATCH 275/313] LoongArch: Lately init pmu after smp is online There is an smp function call named reset_counters() to init PMU registers of every CPU in PMU initialization state. It requires that all CPUs are online. However there is an early_initcall() wrapper for the PMU init funciton init_hw_perf_events(), so that pmu init funciton is called in do_pre_smp_initcalls() which before function smp_init(). Function reset_counters() cannot work on other CPUs since they haven't boot up still. Here replace the wrapper early_initcall() with pure_initcall(), so that the PMU init function is called after every cpu is online. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index 0491bf453cd4..cac7cba81b65 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -884,4 +884,4 @@ static int __init init_hw_perf_events(void) return 0; } -early_initcall(init_hw_perf_events); +pure_initcall(init_hw_perf_events); From d7a5c9de99b3a9a43dce49f2084eb69b5f6a9752 Mon Sep 17 00:00:00 2001 From: Derek Foreman Date: Mon, 18 Mar 2024 07:32:07 -0500 Subject: [PATCH 276/313] drm/etnaviv: fix tx clock gating on some GC7000 variants commit 4bce244272513 ("drm/etnaviv: disable tx clock gating for GC7000 rev6203") accidentally applied the fix for i.MX8MN errata ERR050226 to GC2000 instead of GC7000, failing to disable tx clock gating for GC7000 rev 0x6023 as intended. Additional clean-up further propagated this issue, partially breaking the clock gating fixes added for GC7000 rev 6202 in commit 432f51e7deeda ("drm/etnaviv: add clock gating workaround for GC7000 r6202"). Signed-off-by: Derek Foreman Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 734412aae94d..6f763038c21a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -663,8 +663,8 @@ static void etnaviv_gpu_enable_mlcg(struct etnaviv_gpu *gpu) /* Disable TX clock gating on affected core revisions. */ if (etnaviv_is_model_rev(gpu, GC4000, 0x5222) || etnaviv_is_model_rev(gpu, GC2000, 0x5108) || - etnaviv_is_model_rev(gpu, GC2000, 0x6202) || - etnaviv_is_model_rev(gpu, GC2000, 0x6203)) + etnaviv_is_model_rev(gpu, GC7000, 0x6202) || + etnaviv_is_model_rev(gpu, GC7000, 0x6203)) pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_TX; /* Disable SE and RA clock gating on affected core revisions. */ From e877d705704d7c8fe17b6b5ebdfdb14b84c207a7 Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Sat, 20 Apr 2024 15:41:58 +0200 Subject: [PATCH 277/313] Revert "drm/etnaviv: Expose a few more chipspecs to userspace" This reverts commit 1dccdba084897443d116508a8ed71e0ac8a031a4. In userspace a different approach was choosen - hwdb. As a result, there is no need for these values. Signed-off-by: Christian Gmeiner Reviewed-by: Tomeu Vizoso Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 20 --------------- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 12 --------- drivers/gpu/drm/etnaviv/etnaviv_hwdb.c | 34 -------------------------- include/uapi/drm/etnaviv_drm.h | 5 ---- 4 files changed, 71 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 6f763038c21a..a9bf426f69b3 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -164,26 +164,6 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value) *value = gpu->identity.eco_id; break; - case ETNAVIV_PARAM_GPU_NN_CORE_COUNT: - *value = gpu->identity.nn_core_count; - break; - - case ETNAVIV_PARAM_GPU_NN_MAD_PER_CORE: - *value = gpu->identity.nn_mad_per_core; - break; - - case ETNAVIV_PARAM_GPU_TP_CORE_COUNT: - *value = gpu->identity.tp_core_count; - break; - - case ETNAVIV_PARAM_GPU_ON_CHIP_SRAM_SIZE: - *value = gpu->identity.on_chip_sram_size; - break; - - case ETNAVIV_PARAM_GPU_AXI_SRAM_SIZE: - *value = gpu->identity.axi_sram_size; - break; - default: DBG("%s: invalid param: %u", dev_name(gpu->dev), param); return -EINVAL; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 7d5e9158e13c..197e0037732e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -54,18 +54,6 @@ struct etnaviv_chip_identity { /* Number of Neural Network cores. */ u32 nn_core_count; - /* Number of MAD units per Neural Network core. */ - u32 nn_mad_per_core; - - /* Number of Tensor Processing cores. */ - u32 tp_core_count; - - /* Size in bytes of the SRAM inside the NPU. */ - u32 on_chip_sram_size; - - /* Size in bytes of the SRAM across the AXI bus. */ - u32 axi_sram_size; - /* Size of the vertex cache. */ u32 vertex_cache_size; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c index d8e7334de8ce..8665f2658d51 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c @@ -17,10 +17,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 128, .shader_core_count = 1, .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 8, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -52,11 +48,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .register_max = 64, .thread_count = 256, .shader_core_count = 1, - .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 8, .vertex_output_buffer_size = 512, .pixel_pipes = 1, @@ -89,10 +80,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 512, .shader_core_count = 2, .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -125,10 +112,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 512, .shader_core_count = 2, .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -160,11 +143,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .register_max = 64, .thread_count = 512, .shader_core_count = 2, - .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -197,10 +175,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 1024, .shader_core_count = 4, .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 2, @@ -233,10 +207,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 256, .shader_core_count = 1, .nn_core_count = 8, - .nn_mad_per_core = 64, - .tp_core_count = 4, - .on_chip_sram_size = 524288, - .axi_sram_size = 1048576, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -269,10 +239,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 256, .shader_core_count = 1, .nn_core_count = 6, - .nn_mad_per_core = 64, - .tp_core_count = 3, - .on_chip_sram_size = 262144, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h index d87410a8443a..af024d90453d 100644 --- a/include/uapi/drm/etnaviv_drm.h +++ b/include/uapi/drm/etnaviv_drm.h @@ -77,11 +77,6 @@ struct drm_etnaviv_timespec { #define ETNAVIV_PARAM_GPU_PRODUCT_ID 0x1c #define ETNAVIV_PARAM_GPU_CUSTOMER_ID 0x1d #define ETNAVIV_PARAM_GPU_ECO_ID 0x1e -#define ETNAVIV_PARAM_GPU_NN_CORE_COUNT 0x1f -#define ETNAVIV_PARAM_GPU_NN_MAD_PER_CORE 0x20 -#define ETNAVIV_PARAM_GPU_TP_CORE_COUNT 0x21 -#define ETNAVIV_PARAM_GPU_ON_CHIP_SRAM_SIZE 0x22 -#define ETNAVIV_PARAM_GPU_AXI_SRAM_SIZE 0x23 #define ETNA_MAX_PIPES 4 From 475747a19316b08e856c666a20503e73d7ed67ed Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 23 Apr 2024 11:13:02 -0700 Subject: [PATCH 278/313] macsec: Enable devices to advertise whether they update sk_buff md_dst during offloads Cannot know whether a Rx skb missing md_dst is intended for MACsec or not without knowing whether the device is able to update this field during an offload. Assume that an offload to a MACsec device cannot support updating md_dst by default. Capable devices can advertise that they do indicate that an skb is related to a MACsec offloaded packet using the md_dst. Cc: Sabrina Dubroca Cc: stable@vger.kernel.org Fixes: 860ead89b851 ("net/macsec: Add MACsec skb_metadata_dst Rx Data path support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-2-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/macsec.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/macsec.h b/include/net/macsec.h index dbd22180cc5c..de216cbc6b05 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -321,6 +321,7 @@ struct macsec_context { * for the TX tag * @needed_tailroom: number of bytes reserved at the end of the sk_buff for the * TX tag + * @rx_uses_md_dst: whether MACsec device offload supports sk_buff md_dst */ struct macsec_ops { /* Device wide */ @@ -352,6 +353,7 @@ struct macsec_ops { struct sk_buff *skb); unsigned int needed_headroom; unsigned int needed_tailroom; + bool rx_uses_md_dst; }; void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa); From 6e159fd653d7ebf6290358e0330a0cb8a75cf73b Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 23 Apr 2024 11:13:03 -0700 Subject: [PATCH 279/313] ethernet: Add helper for assigning packet type when dest address does not match device address Enable reuse of logic in eth_type_trans for determining packet type. Suggested-by: Sabrina Dubroca Cc: stable@vger.kernel.org Signed-off-by: Rahul Rameshbabu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-3-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/etherdevice.h | 25 +++++++++++++++++++++++++ net/ethernet/eth.c | 12 +----------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 224645f17c33..297231854ada 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -607,6 +607,31 @@ static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr, eth_hw_addr_set(dev, addr); } +/** + * eth_skb_pkt_type - Assign packet type if destination address does not match + * @skb: Assigned a packet type if address does not match @dev address + * @dev: Network device used to compare packet address against + * + * If the destination MAC address of the packet does not match the network + * device address, assign an appropriate packet type. + */ +static inline void eth_skb_pkt_type(struct sk_buff *skb, + const struct net_device *dev) +{ + const struct ethhdr *eth = eth_hdr(skb); + + if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) { + if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { + if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } else { + skb->pkt_type = PACKET_OTHERHOST; + } + } +} + /** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 2edc8b796a4e..049c3adeb850 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -164,17 +164,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) eth = (struct ethhdr *)skb->data; skb_pull_inline(skb, ETH_HLEN); - if (unlikely(!ether_addr_equal_64bits(eth->h_dest, - dev->dev_addr))) { - if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { - if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) - skb->pkt_type = PACKET_BROADCAST; - else - skb->pkt_type = PACKET_MULTICAST; - } else { - skb->pkt_type = PACKET_OTHERHOST; - } - } + eth_skb_pkt_type(skb, dev); /* * Some variants of DSA tagging don't have an ethertype field From 642c984dd0e37dbaec9f87bd1211e5fac1f142bf Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 23 Apr 2024 11:13:04 -0700 Subject: [PATCH 280/313] macsec: Detect if Rx skb is macsec-related for offloading devices that update md_dst Can now correctly identify where the packets should be delivered by using md_dst or its absence on devices that provide it. This detection is not possible without device drivers that update md_dst. A fallback pattern should be used for supporting such device drivers. This fallback mode causes multicast messages to be cloned to both the non-macsec and macsec ports, independent of whether the multicast message received was encrypted over MACsec or not. Other non-macsec traffic may also fail to be handled correctly for devices in promiscuous mode. Link: https://lore.kernel.org/netdev/ZULRxX9eIbFiVi7v@hog/ Cc: Sabrina Dubroca Cc: stable@vger.kernel.org Fixes: 860ead89b851 ("net/macsec: Add MACsec skb_metadata_dst Rx Data path support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-4-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 46 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 0206b84284ab..ff016c11b4a0 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -999,10 +999,12 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) struct metadata_dst *md_dst; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; + bool is_macsec_md_dst; rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); md_dst = skb_metadata_dst(skb); + is_macsec_md_dst = md_dst && md_dst->type == METADATA_MACSEC; list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct sk_buff *nskb; @@ -1013,14 +1015,42 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) * the SecTAG, so we have to deduce which port to deliver to. */ if (macsec_is_offloaded(macsec) && netif_running(ndev)) { - struct macsec_rx_sc *rx_sc = NULL; + const struct macsec_ops *ops; - if (md_dst && md_dst->type == METADATA_MACSEC) - rx_sc = find_rx_sc(&macsec->secy, md_dst->u.macsec_info.sci); + ops = macsec_get_ops(macsec, NULL); - if (md_dst && md_dst->type == METADATA_MACSEC && !rx_sc) + if (ops->rx_uses_md_dst && !is_macsec_md_dst) continue; + if (is_macsec_md_dst) { + struct macsec_rx_sc *rx_sc; + + /* All drivers that implement MACsec offload + * support using skb metadata destinations must + * indicate that they do so. + */ + DEBUG_NET_WARN_ON_ONCE(!ops->rx_uses_md_dst); + rx_sc = find_rx_sc(&macsec->secy, + md_dst->u.macsec_info.sci); + if (!rx_sc) + continue; + /* device indicated macsec offload occurred */ + skb->dev = ndev; + skb->pkt_type = PACKET_HOST; + eth_skb_pkt_type(skb, ndev); + ret = RX_HANDLER_ANOTHER; + goto out; + } + + /* This datapath is insecure because it is unable to + * enforce isolation of broadcast/multicast traffic and + * unicast traffic with promiscuous mode on the macsec + * netdev. Since the core stack has no mechanism to + * check that the hardware did indeed receive MACsec + * traffic, it is possible that the response handling + * done by the MACsec port was to a plaintext packet. + * This violates the MACsec protocol standard. + */ if (ether_addr_equal_64bits(hdr->h_dest, ndev->dev_addr)) { /* exact match, divert skb to this port */ @@ -1036,14 +1066,10 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) break; nskb->dev = ndev; - if (ether_addr_equal_64bits(hdr->h_dest, - ndev->broadcast)) - nskb->pkt_type = PACKET_BROADCAST; - else - nskb->pkt_type = PACKET_MULTICAST; + eth_skb_pkt_type(nskb, ndev); __netif_rx(nskb); - } else if (rx_sc || ndev->flags & IFF_PROMISC) { + } else if (ndev->flags & IFF_PROMISC) { skb->dev = ndev; skb->pkt_type = PACKET_HOST; ret = RX_HANDLER_ANOTHER; From 39d26a8f2efcb8b5665fe7d54a7dba306a8f1dff Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 23 Apr 2024 11:13:05 -0700 Subject: [PATCH 281/313] net/mlx5e: Advertise mlx5 ethernet driver updates sk_buff md_dst for MACsec mlx5 Rx flow steering and CQE handling enable the driver to be able to update an skb's md_dst attribute as MACsec when MACsec traffic arrives when a device is configured for offloading. Advertise this to the core stack to take advantage of this capability. Cc: stable@vger.kernel.org Fixes: b7c9400cbc48 ("net/mlx5e: Implement MACsec Rx data path using MACsec skb_metadata_dst") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-5-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index b2cabd6ab86c..cc9bcc420032 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -1640,6 +1640,7 @@ static const struct macsec_ops macsec_offload_ops = { .mdo_add_secy = mlx5e_macsec_add_secy, .mdo_upd_secy = mlx5e_macsec_upd_secy, .mdo_del_secy = mlx5e_macsec_del_secy, + .rx_uses_md_dst = true, }; bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb) From 4dcd0e83ea1d1df9b2e0174a6d3e795b3477d64e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 Apr 2024 19:15:22 +0300 Subject: [PATCH 282/313] net: ti: icssg-prueth: Fix signedness bug in prueth_init_rx_chns() The rx_chn->irq[] array is unsigned int but it should be signed for the error handling to work. Also if k3_udma_glue_rx_get_irq() returns zero then we should return -ENXIO instead of success. Fixes: 128d5874c082 ("net: ti: icssg-prueth: Add ICSSG ethernet driver") Signed-off-by: Dan Carpenter Reviewed-by: Roger Quadros Reviewed-by: MD Danish Anwar Link: https://lore.kernel.org/r/05282415-e7f4-42f3-99f8-32fde8f30936@moroto.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index cf7b73f8f450..b69af69a1ccd 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -421,12 +421,14 @@ static int prueth_init_rx_chns(struct prueth_emac *emac, if (!i) fdqring_id = k3_udma_glue_rx_flow_get_fdq_id(rx_chn->rx_chn, i); - rx_chn->irq[i] = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i); - if (rx_chn->irq[i] <= 0) { - ret = rx_chn->irq[i]; + ret = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i); + if (ret <= 0) { + if (!ret) + ret = -ENXIO; netdev_err(ndev, "Failed to get rx dma irq"); goto fail; } + rx_chn->irq[i] = ret; } return 0; From 2cc7d150550cc981aceedf008f5459193282425c Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Tue, 23 Apr 2024 11:27:17 -0700 Subject: [PATCH 283/313] i40e: Do not use WQ_MEM_RECLAIM flag for workqueue Issue reported by customer during SRIOV testing, call trace: When both i40e and the i40iw driver are loaded, a warning in check_flush_dependency is being triggered. This seems to be because of the i40e driver workqueue is allocated with the WQ_MEM_RECLAIM flag, and the i40iw one is not. Similar error was encountered on ice too and it was fixed by removing the flag. Do the same for i40e too. [Feb 9 09:08] ------------[ cut here ]------------ [ +0.000004] workqueue: WQ_MEM_RECLAIM i40e:i40e_service_task [i40e] is flushing !WQ_MEM_RECLAIM infiniband:0x0 [ +0.000060] WARNING: CPU: 0 PID: 937 at kernel/workqueue.c:2966 check_flush_dependency+0x10b/0x120 [ +0.000007] Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_timer snd_seq_device snd soundcore nls_utf8 cifs cifs_arc4 nls_ucs2_utils rdma_cm iw_cm ib_cm cifs_md4 dns_resolver netfs qrtr rfkill sunrpc vfat fat intel_rapl_msr intel_rapl_common irdma intel_uncore_frequency intel_uncore_frequency_common ice ipmi_ssif isst_if_common skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp gnss coretemp ib_uverbs rapl intel_cstate ib_core iTCO_wdt iTCO_vendor_support acpi_ipmi mei_me ipmi_si intel_uncore ioatdma i2c_i801 joydev pcspkr mei ipmi_devintf lpc_ich intel_pch_thermal i2c_smbus ipmi_msghandler acpi_power_meter acpi_pad xfs libcrc32c ast sd_mod drm_shmem_helper t10_pi drm_kms_helper sg ixgbe drm i40e ahci crct10dif_pclmul libahci crc32_pclmul igb crc32c_intel libata ghash_clmulni_intel i2c_algo_bit mdio dca wmi dm_mirror dm_region_hash dm_log dm_mod fuse [ +0.000050] CPU: 0 PID: 937 Comm: kworker/0:3 Kdump: loaded Not tainted 6.8.0-rc2-Feb-net_dev-Qiueue-00279-gbd43c5687e05 #1 [ +0.000003] Hardware name: Intel Corporation S2600BPB/S2600BPB, BIOS SE5C620.86B.02.01.0013.121520200651 12/15/2020 [ +0.000001] Workqueue: i40e i40e_service_task [i40e] [ +0.000024] RIP: 0010:check_flush_dependency+0x10b/0x120 [ +0.000003] Code: ff 49 8b 54 24 18 48 8d 8b b0 00 00 00 49 89 e8 48 81 c6 b0 00 00 00 48 c7 c7 b0 97 fa 9f c6 05 8a cc 1f 02 01 e8 35 b3 fd ff <0f> 0b e9 10 ff ff ff 80 3d 78 cc 1f 02 00 75 94 e9 46 ff ff ff 90 [ +0.000002] RSP: 0018:ffffbd294976bcf8 EFLAGS: 00010282 [ +0.000002] RAX: 0000000000000000 RBX: ffff94d4c483c000 RCX: 0000000000000027 [ +0.000001] RDX: ffff94d47f620bc8 RSI: 0000000000000001 RDI: ffff94d47f620bc0 [ +0.000001] RBP: 0000000000000000 R08: 0000000000000000 R09: 00000000ffff7fff [ +0.000001] R10: ffffbd294976bb98 R11: ffffffffa0be65e8 R12: ffff94c5451ea180 [ +0.000001] R13: ffff94c5ab5e8000 R14: ffff94c5c20b6e05 R15: ffff94c5f1330ab0 [ +0.000001] FS: 0000000000000000(0000) GS:ffff94d47f600000(0000) knlGS:0000000000000000 [ +0.000002] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000001] CR2: 00007f9e6f1fca70 CR3: 0000000038e20004 CR4: 00000000007706f0 [ +0.000000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ +0.000001] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ +0.000001] PKRU: 55555554 [ +0.000001] Call Trace: [ +0.000001] [ +0.000002] ? __warn+0x80/0x130 [ +0.000003] ? check_flush_dependency+0x10b/0x120 [ +0.000002] ? report_bug+0x195/0x1a0 [ +0.000005] ? handle_bug+0x3c/0x70 [ +0.000003] ? exc_invalid_op+0x14/0x70 [ +0.000002] ? asm_exc_invalid_op+0x16/0x20 [ +0.000006] ? check_flush_dependency+0x10b/0x120 [ +0.000002] ? check_flush_dependency+0x10b/0x120 [ +0.000002] __flush_workqueue+0x126/0x3f0 [ +0.000015] ib_cache_cleanup_one+0x1c/0xe0 [ib_core] [ +0.000056] __ib_unregister_device+0x6a/0xb0 [ib_core] [ +0.000023] ib_unregister_device_and_put+0x34/0x50 [ib_core] [ +0.000020] i40iw_close+0x4b/0x90 [irdma] [ +0.000022] i40e_notify_client_of_netdev_close+0x54/0xc0 [i40e] [ +0.000035] i40e_service_task+0x126/0x190 [i40e] [ +0.000024] process_one_work+0x174/0x340 [ +0.000003] worker_thread+0x27e/0x390 [ +0.000001] ? __pfx_worker_thread+0x10/0x10 [ +0.000002] kthread+0xdf/0x110 [ +0.000002] ? __pfx_kthread+0x10/0x10 [ +0.000002] ret_from_fork+0x2d/0x50 [ +0.000003] ? __pfx_kthread+0x10/0x10 [ +0.000001] ret_from_fork_asm+0x1b/0x30 [ +0.000004] [ +0.000001] ---[ end trace 0000000000000000 ]--- Fixes: 4d5957cbdecd ("i40e: remove WQ_UNBOUND and the task limit of our workqueue") Signed-off-by: Sindhu Devale Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Mateusz Polchlopek Signed-off-by: Aleksandr Loktionov Tested-by: Robert Ganzynkowicz Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240423182723.740401-2-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 48b9ddb2b1b3..1792491d8d2d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16650,7 +16650,7 @@ static int __init i40e_init_module(void) * since we need to be able to guarantee forward progress even under * memory pressure. */ - i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name); + i40e_wq = alloc_workqueue("%s", 0, 0, i40e_driver_name); if (!i40e_wq) { pr_err("%s: Failed to create workqueue\n", i40e_driver_name); return -ENOMEM; From ef3c313119ea448c22da10366faa26b5b4b1a18e Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Tue, 23 Apr 2024 11:27:18 -0700 Subject: [PATCH 284/313] i40e: Report MFS in decimal base instead of hex If the MFS is set below the default (0x2600), a warning message is reported like the following : MFS for port 1 has been set below the default: 600 This message is a bit confusing as the number shown here (600) is in fact an hexa number: 0x600 = 1536 Without any explicit "0x" prefix, this message is read like the MFS is set to 600 bytes. MFS, as per MTUs, are usually expressed in decimal base. This commit reports both current and default MFS values in decimal so it's less confusing for end-users. A typical warning message looks like the following : MFS for port 1 (1536) has been set below the default (9728) Signed-off-by: Erwan Velu Reviewed-by: Simon Horman Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen Fixes: 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set") Link: https://lore.kernel.org/r/20240423182723.740401-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1792491d8d2d..ffb9f9f15c52 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16107,8 +16107,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) val = FIELD_GET(I40E_PRTGL_SAH_MFS_MASK, rd32(&pf->hw, I40E_PRTGL_SAH)); if (val < MAX_FRAME_SIZE_DEFAULT) - dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n", - pf->hw.port, val); + dev_warn(&pdev->dev, "MFS for port %x (%d) has been set below the default (%d)\n", + pf->hw.port, val, MAX_FRAME_SIZE_DEFAULT); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out From 54976cf58d6168b8d15cebb395069f23b2f34b31 Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Tue, 23 Apr 2024 11:27:19 -0700 Subject: [PATCH 285/313] iavf: Fix TC config comparison with existing adapter TC config Same number of TCs doesn't imply that underlying TC configs are same. The config could be different due to difference in number of queues in each TC. Add utility function to determine if TC configs are same. Fixes: d5b33d024496 ("i40evf: add ndo_setup_tc callback to i40evf") Signed-off-by: Sudheer Mogilappagari Tested-by: Mineri Bhange (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240423182723.740401-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf_main.c | 30 ++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index ef2440f3abf8..166832a4213a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3502,6 +3502,34 @@ static void iavf_del_all_cloud_filters(struct iavf_adapter *adapter) spin_unlock_bh(&adapter->cloud_filter_list_lock); } +/** + * iavf_is_tc_config_same - Compare the mqprio TC config with the + * TC config already configured on this adapter. + * @adapter: board private structure + * @mqprio_qopt: TC config received from kernel. + * + * This function compares the TC config received from the kernel + * with the config already configured on the adapter. + * + * Return: True if configuration is same, false otherwise. + **/ +static bool iavf_is_tc_config_same(struct iavf_adapter *adapter, + struct tc_mqprio_qopt *mqprio_qopt) +{ + struct virtchnl_channel_info *ch = &adapter->ch_config.ch_info[0]; + int i; + + if (adapter->num_tc != mqprio_qopt->num_tc) + return false; + + for (i = 0; i < adapter->num_tc; i++) { + if (ch[i].count != mqprio_qopt->count[i] || + ch[i].offset != mqprio_qopt->offset[i]) + return false; + } + return true; +} + /** * __iavf_setup_tc - configure multiple traffic classes * @netdev: network interface device structure @@ -3559,7 +3587,7 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) if (ret) return ret; /* Return if same TC config is requested */ - if (adapter->num_tc == num_tc) + if (iavf_is_tc_config_same(adapter, &mqprio_qopt->qopt)) return 0; adapter->num_tc = num_tc; From 96fdd1f6b4ed72a741fb0eb705c0e13049b8721f Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 23 Apr 2024 11:27:20 -0700 Subject: [PATCH 286/313] ice: fix LAG and VF lock dependency in ice_reset_vf() 9f74a3dfcf83 ("ice: Fix VF Reset paths when interface in a failed over aggregate"), the ice driver has acquired the LAG mutex in ice_reset_vf(). The commit placed this lock acquisition just prior to the acquisition of the VF configuration lock. If ice_reset_vf() acquires the configuration lock via the ICE_VF_RESET_LOCK flag, this could deadlock with ice_vc_cfg_qs_msg() because it always acquires the locks in the order of the VF configuration lock and then the LAG mutex. Lockdep reports this violation almost immediately on creating and then removing 2 VF: ====================================================== WARNING: possible circular locking dependency detected 6.8.0-rc6 #54 Tainted: G W O ------------------------------------------------------ kworker/60:3/6771 is trying to acquire lock: ff40d43e099380a0 (&vf->cfg_lock){+.+.}-{3:3}, at: ice_reset_vf+0x22f/0x4d0 [ice] but task is already holding lock: ff40d43ea1961210 (&pf->lag_mutex){+.+.}-{3:3}, at: ice_reset_vf+0xb7/0x4d0 [ice] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&pf->lag_mutex){+.+.}-{3:3}: __lock_acquire+0x4f8/0xb40 lock_acquire+0xd4/0x2d0 __mutex_lock+0x9b/0xbf0 ice_vc_cfg_qs_msg+0x45/0x690 [ice] ice_vc_process_vf_msg+0x4f5/0x870 [ice] __ice_clean_ctrlq+0x2b5/0x600 [ice] ice_service_task+0x2c9/0x480 [ice] process_one_work+0x1e9/0x4d0 worker_thread+0x1e1/0x3d0 kthread+0x104/0x140 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x1b/0x30 -> #0 (&vf->cfg_lock){+.+.}-{3:3}: check_prev_add+0xe2/0xc50 validate_chain+0x558/0x800 __lock_acquire+0x4f8/0xb40 lock_acquire+0xd4/0x2d0 __mutex_lock+0x9b/0xbf0 ice_reset_vf+0x22f/0x4d0 [ice] ice_process_vflr_event+0x98/0xd0 [ice] ice_service_task+0x1cc/0x480 [ice] process_one_work+0x1e9/0x4d0 worker_thread+0x1e1/0x3d0 kthread+0x104/0x140 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x1b/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&pf->lag_mutex); lock(&vf->cfg_lock); lock(&pf->lag_mutex); lock(&vf->cfg_lock); *** DEADLOCK *** 4 locks held by kworker/60:3/6771: #0: ff40d43e05428b38 ((wq_completion)ice){+.+.}-{0:0}, at: process_one_work+0x176/0x4d0 #1: ff50d06e05197e58 ((work_completion)(&pf->serv_task)){+.+.}-{0:0}, at: process_one_work+0x176/0x4d0 #2: ff40d43ea1960e50 (&pf->vfs.table_lock){+.+.}-{3:3}, at: ice_process_vflr_event+0x48/0xd0 [ice] #3: ff40d43ea1961210 (&pf->lag_mutex){+.+.}-{3:3}, at: ice_reset_vf+0xb7/0x4d0 [ice] stack backtrace: CPU: 60 PID: 6771 Comm: kworker/60:3 Tainted: G W O 6.8.0-rc6 #54 Hardware name: Workqueue: ice ice_service_task [ice] Call Trace: dump_stack_lvl+0x4a/0x80 check_noncircular+0x12d/0x150 check_prev_add+0xe2/0xc50 ? save_trace+0x59/0x230 ? add_chain_cache+0x109/0x450 validate_chain+0x558/0x800 __lock_acquire+0x4f8/0xb40 ? lockdep_hardirqs_on+0x7d/0x100 lock_acquire+0xd4/0x2d0 ? ice_reset_vf+0x22f/0x4d0 [ice] ? lock_is_held_type+0xc7/0x120 __mutex_lock+0x9b/0xbf0 ? ice_reset_vf+0x22f/0x4d0 [ice] ? ice_reset_vf+0x22f/0x4d0 [ice] ? rcu_is_watching+0x11/0x50 ? ice_reset_vf+0x22f/0x4d0 [ice] ice_reset_vf+0x22f/0x4d0 [ice] ? process_one_work+0x176/0x4d0 ice_process_vflr_event+0x98/0xd0 [ice] ice_service_task+0x1cc/0x480 [ice] process_one_work+0x1e9/0x4d0 worker_thread+0x1e1/0x3d0 ? __pfx_worker_thread+0x10/0x10 kthread+0x104/0x140 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 To avoid deadlock, we must acquire the LAG mutex only after acquiring the VF configuration lock. Fix the ice_reset_vf() to acquire the LAG mutex only after we either acquire or check that the VF configuration lock is held. Fixes: 9f74a3dfcf83 ("ice: Fix VF Reset paths when interface in a failed over aggregate") Signed-off-by: Jacob Keller Reviewed-by: Dave Ertman Reviewed-by: Mateusz Polchlopek Tested-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240423182723.740401-5-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_vf_lib.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 21d26e19338a..d10a4be965b5 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -856,6 +856,11 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) return 0; } + if (flags & ICE_VF_RESET_LOCK) + mutex_lock(&vf->cfg_lock); + else + lockdep_assert_held(&vf->cfg_lock); + lag = pf->lag; mutex_lock(&pf->lag_mutex); if (lag && lag->bonded && lag->primary) { @@ -867,11 +872,6 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) act_prt = ICE_LAG_INVALID_PORT; } - if (flags & ICE_VF_RESET_LOCK) - mutex_lock(&vf->cfg_lock); - else - lockdep_assert_held(&vf->cfg_lock); - if (ice_is_vf_disabled(vf)) { vsi = ice_get_vf_vsi(vf); if (!vsi) { @@ -956,14 +956,14 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) ice_mbx_clear_malvf(&vf->mbx_info); out_unlock: - if (flags & ICE_VF_RESET_LOCK) - mutex_unlock(&vf->cfg_lock); - if (lag && lag->bonded && lag->primary && act_prt != ICE_LAG_INVALID_PORT) ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt); mutex_unlock(&pf->lag_mutex); + if (flags & ICE_VF_RESET_LOCK) + mutex_unlock(&vf->cfg_lock); + return err; } From 1b9e743e923b256e353a9a644195372285e5a6c0 Mon Sep 17 00:00:00 2001 From: Jason Reeder Date: Wed, 24 Apr 2024 12:46:26 +0530 Subject: [PATCH 287/313] net: ethernet: ti: am65-cpts: Fix PTPv1 message type on TX packets The CPTS, by design, captures the messageType (Sync, Delay_Req, etc.) field from the second nibble of the PTP header which is defined in the PTPv2 (1588-2008) specification. In the PTPv1 (1588-2002) specification the first two bytes of the PTP header are defined as the versionType which is always 0x0001. This means that any PTPv1 packets that are tagged for TX timestamping by the CPTS will have their messageType set to 0x0 which corresponds to a Sync message type. This causes issues when a PTPv1 stack is expecting a Delay_Req (messageType: 0x1) timestamp that never appears. Fix this by checking if the ptp_class of the timestamped TX packet is PTP_CLASS_V1 and then matching the PTP sequence ID to the stored sequence ID in the skb->cb data structure. If the sequence IDs match and the packet is of type PTPv1 then there is a chance that the messageType has been incorrectly stored by the CPTS so overwrite the messageType stored by the CPTS with the messageType from the skb->cb data structure. This allows the PTPv1 stack to receive TX timestamps for Delay_Req packets which are necessary to lock onto a PTP Leader. Signed-off-by: Jason Reeder Signed-off-by: Ravi Gunasekaran Tested-by: Ed Trexel Fixes: f6bd59526ca5 ("net: ethernet: ti: introduce am654 common platform time sync driver") Link: https://lore.kernel.org/r/20240424071626.32558-1-r-gunasekaran@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpts.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index c66618d91c28..f89716b1cfb6 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -784,6 +784,11 @@ static bool am65_cpts_match_tx_ts(struct am65_cpts *cpts, struct am65_cpts_skb_cb_data *skb_cb = (struct am65_cpts_skb_cb_data *)skb->cb; + if ((ptp_classify_raw(skb) & PTP_CLASS_V1) && + ((mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK) == + (skb_cb->skb_mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK))) + mtype_seqid = skb_cb->skb_mtype_seqid; + if (mtype_seqid == skb_cb->skb_mtype_seqid) { u64 ns = event->timestamp; From 6e965eba43e9724f3e603d7b7cc83e53b23d155e Mon Sep 17 00:00:00 2001 From: Su Hui Date: Wed, 24 Apr 2024 10:27:25 +0800 Subject: [PATCH 288/313] octeontx2-af: fix the double free in rvu_npc_freemem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang static checker(scan-build) warning: drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c:line 2184, column 2 Attempt to free released memory. npc_mcam_rsrcs_deinit() has released 'mcam->counters.bmap'. Deleted this redundant kfree() to fix this double free problem. Fixes: dd7842878633 ("octeontx2-af: Add new devlink param to configure maximum usable NIX block LFs") Signed-off-by: Su Hui Reviewed-by: Geetha sowjanya Reviewed-by: Kalesh AP Reviewed-by: Hariprasad Kelam Link: https://lore.kernel.org/r/20240424022724.144587-1-suhui@nfschina.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index be709f83f331..e8b73b9d75e3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -2181,7 +2181,6 @@ void rvu_npc_freemem(struct rvu *rvu) kfree(pkind->rsrc.bmap); npc_mcam_rsrcs_deinit(rvu); - kfree(mcam->counters.bmap); if (rvu->kpu_prfl_addr) iounmap(rvu->kpu_prfl_addr); else From 0c81ea5a8e231fa120e3f76aa9ea99fa3950cc59 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 24 Apr 2024 09:45:21 +0200 Subject: [PATCH 289/313] net: ravb: Fix registered interrupt names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As interrupts are now requested from ravb_probe(), before calling register_netdev(), ndev->name still contains the template "eth%d", leading to funny names in /proc/interrupts. E.g. on R-Car E3: 89: 0 0 GICv2 93 Level eth%d:ch22:multi 90: 0 3 GICv2 95 Level eth%d:ch24:emac 91: 0 23484 GICv2 71 Level eth%d:ch0:rx_be 92: 0 0 GICv2 72 Level eth%d:ch1:rx_nc 93: 0 13735 GICv2 89 Level eth%d:ch18:tx_be 94: 0 0 GICv2 90 Level eth%d:ch19:tx_nc Worse, on platforms with multiple RAVB instances (e.g. R-Car V4H), all interrupts have similar names. Fix this by using the device name instead, like is done in several other drivers: 89: 0 0 GICv2 93 Level e6800000.ethernet:ch22:multi 90: 0 1 GICv2 95 Level e6800000.ethernet:ch24:emac 91: 0 28578 GICv2 71 Level e6800000.ethernet:ch0:rx_be 92: 0 0 GICv2 72 Level e6800000.ethernet:ch1:rx_nc 93: 0 14044 GICv2 89 Level e6800000.ethernet:ch18:tx_be 94: 0 0 GICv2 90 Level e6800000.ethernet:ch19:tx_nc Rename the local variable dev_name, as it shadows the dev_name() function, and pre-initialize it, to simplify the code. Fixes: 32f012b8c01ca9fd ("net: ravb: Move getting/requesting IRQs in the probe() method") Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Reviewed-by: Sergey Shtylyov Reviewed-by: Claudiu Beznea Tested-by: Claudiu Beznea # on RZ/G3S Link: https://lore.kernel.org/r/cde67b68adf115b3cf0b44c32334ae00b2fbb321.1713944647.git.geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index fcb756d77681..9b1f639f64a1 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2722,19 +2722,18 @@ static int ravb_setup_irq(struct ravb_private *priv, const char *irq_name, struct platform_device *pdev = priv->pdev; struct net_device *ndev = priv->ndev; struct device *dev = &pdev->dev; - const char *dev_name; + const char *devname = dev_name(dev); unsigned long flags; int error, irq_num; if (irq_name) { - dev_name = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", ndev->name, ch); - if (!dev_name) + devname = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", devname, ch); + if (!devname) return -ENOMEM; irq_num = platform_get_irq_byname(pdev, irq_name); flags = 0; } else { - dev_name = ndev->name; irq_num = platform_get_irq(pdev, 0); flags = IRQF_SHARED; } @@ -2744,9 +2743,9 @@ static int ravb_setup_irq(struct ravb_private *priv, const char *irq_name, if (irq) *irq = irq_num; - error = devm_request_irq(dev, irq_num, handler, flags, dev_name, ndev); + error = devm_request_irq(dev, irq_num, handler, flags, devname, ndev); if (error) - netdev_err(ndev, "cannot request IRQ %s\n", dev_name); + netdev_err(ndev, "cannot request IRQ %s\n", devname); return error; } From 38d7b94e81d068b8d8c8392f421cfd2c3bbfd1a6 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Wed, 24 Apr 2024 12:16:36 +0200 Subject: [PATCH 290/313] dpll: fix dpll_pin_on_pin_register() for multiple parent pins In scenario where pin is registered with multiple parent pins via dpll_pin_on_pin_register(..), all belonging to the same dpll device. A second call to dpll_pin_on_pin_unregister(..) would cause a call trace, as it tries to use already released registration resources (due to fix introduced in b446631f355e). In this scenario pin was registered twice, so resources are not yet expected to be release until each registered pin/pin pair is unregistered. Currently, the following crash/call trace is produced when ice driver is removed on the system with installed E810T NIC which includes dpll device: WARNING: CPU: 51 PID: 9155 at drivers/dpll/dpll_core.c:809 dpll_pin_ops+0x20/0x30 RIP: 0010:dpll_pin_ops+0x20/0x30 Call Trace: ? __warn+0x7f/0x130 ? dpll_pin_ops+0x20/0x30 dpll_msg_add_pin_freq+0x37/0x1d0 dpll_cmd_pin_get_one+0x1c0/0x400 ? __nlmsg_put+0x63/0x80 dpll_pin_event_send+0x93/0x140 dpll_pin_on_pin_unregister+0x3f/0x100 ice_dpll_deinit_pins+0xa1/0x230 [ice] ice_remove+0xf1/0x210 [ice] Fix by adding a parent pointer as a cookie when creating a registration, also when searching for it. For the regular pins pass NULL, this allows to create separated registration for each parent the pin is registered with. Fixes: b446631f355e ("dpll: fix dpll_xa_ref_*_del() for multiple registrations") Signed-off-by: Arkadiusz Kubalewski Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240424101636.1491424-1-arkadiusz.kubalewski@intel.com Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_core.c | 58 +++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 64eaca80d736..d0f6693ca142 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -42,6 +42,7 @@ struct dpll_pin_registration { struct list_head list; const struct dpll_pin_ops *ops; void *priv; + void *cookie; }; struct dpll_device *dpll_device_get_by_id(int id) @@ -54,12 +55,14 @@ struct dpll_device *dpll_device_get_by_id(int id) static struct dpll_pin_registration * dpll_pin_registration_find(struct dpll_pin_ref *ref, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, + void *cookie) { struct dpll_pin_registration *reg; list_for_each_entry(reg, &ref->registration_list, list) { - if (reg->ops == ops && reg->priv == priv) + if (reg->ops == ops && reg->priv == priv && + reg->cookie == cookie) return reg; } return NULL; @@ -67,7 +70,8 @@ dpll_pin_registration_find(struct dpll_pin_ref *ref, static int dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, + void *cookie) { struct dpll_pin_registration *reg; struct dpll_pin_ref *ref; @@ -78,7 +82,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin, xa_for_each(xa_pins, i, ref) { if (ref->pin != pin) continue; - reg = dpll_pin_registration_find(ref, ops, priv); + reg = dpll_pin_registration_find(ref, ops, priv, cookie); if (reg) { refcount_inc(&ref->refcount); return 0; @@ -111,6 +115,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin, } reg->ops = ops; reg->priv = priv; + reg->cookie = cookie; if (ref_exists) refcount_inc(&ref->refcount); list_add_tail(®->list, &ref->registration_list); @@ -119,7 +124,8 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin, } static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, + void *cookie) { struct dpll_pin_registration *reg; struct dpll_pin_ref *ref; @@ -128,7 +134,7 @@ static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin, xa_for_each(xa_pins, i, ref) { if (ref->pin != pin) continue; - reg = dpll_pin_registration_find(ref, ops, priv); + reg = dpll_pin_registration_find(ref, ops, priv, cookie); if (WARN_ON(!reg)) return -EINVAL; list_del(®->list); @@ -146,7 +152,7 @@ static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin, static int dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, void *cookie) { struct dpll_pin_registration *reg; struct dpll_pin_ref *ref; @@ -157,7 +163,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll, xa_for_each(xa_dplls, i, ref) { if (ref->dpll != dpll) continue; - reg = dpll_pin_registration_find(ref, ops, priv); + reg = dpll_pin_registration_find(ref, ops, priv, cookie); if (reg) { refcount_inc(&ref->refcount); return 0; @@ -190,6 +196,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll, } reg->ops = ops; reg->priv = priv; + reg->cookie = cookie; if (ref_exists) refcount_inc(&ref->refcount); list_add_tail(®->list, &ref->registration_list); @@ -199,7 +206,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll, static void dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, void *cookie) { struct dpll_pin_registration *reg; struct dpll_pin_ref *ref; @@ -208,7 +215,7 @@ dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll, xa_for_each(xa_dplls, i, ref) { if (ref->dpll != dpll) continue; - reg = dpll_pin_registration_find(ref, ops, priv); + reg = dpll_pin_registration_find(ref, ops, priv, cookie); if (WARN_ON(!reg)) return; list_del(®->list); @@ -594,14 +601,14 @@ EXPORT_SYMBOL_GPL(dpll_pin_put); static int __dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, void *cookie) { int ret; - ret = dpll_xa_ref_pin_add(&dpll->pin_refs, pin, ops, priv); + ret = dpll_xa_ref_pin_add(&dpll->pin_refs, pin, ops, priv, cookie); if (ret) return ret; - ret = dpll_xa_ref_dpll_add(&pin->dpll_refs, dpll, ops, priv); + ret = dpll_xa_ref_dpll_add(&pin->dpll_refs, dpll, ops, priv, cookie); if (ret) goto ref_pin_del; xa_set_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED); @@ -610,7 +617,7 @@ __dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, return ret; ref_pin_del: - dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv); + dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv, cookie); return ret; } @@ -642,7 +649,7 @@ dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, dpll->clock_id == pin->clock_id))) ret = -EINVAL; else - ret = __dpll_pin_register(dpll, pin, ops, priv); + ret = __dpll_pin_register(dpll, pin, ops, priv, NULL); mutex_unlock(&dpll_lock); return ret; @@ -651,11 +658,11 @@ EXPORT_SYMBOL_GPL(dpll_pin_register); static void __dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, void *cookie) { ASSERT_DPLL_PIN_REGISTERED(pin); - dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv); - dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv); + dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv, cookie); + dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv, cookie); if (xa_empty(&pin->dpll_refs)) xa_clear_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED); } @@ -680,7 +687,7 @@ void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, mutex_lock(&dpll_lock); dpll_pin_delete_ntf(pin); - __dpll_pin_unregister(dpll, pin, ops, priv); + __dpll_pin_unregister(dpll, pin, ops, priv, NULL); mutex_unlock(&dpll_lock); } EXPORT_SYMBOL_GPL(dpll_pin_unregister); @@ -716,12 +723,12 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, return -EINVAL; mutex_lock(&dpll_lock); - ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv); + ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv, pin); if (ret) goto unlock; refcount_inc(&pin->refcount); xa_for_each(&parent->dpll_refs, i, ref) { - ret = __dpll_pin_register(ref->dpll, pin, ops, priv); + ret = __dpll_pin_register(ref->dpll, pin, ops, priv, parent); if (ret) { stop = i; goto dpll_unregister; @@ -735,11 +742,12 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, dpll_unregister: xa_for_each(&parent->dpll_refs, i, ref) if (i < stop) { - __dpll_pin_unregister(ref->dpll, pin, ops, priv); + __dpll_pin_unregister(ref->dpll, pin, ops, priv, + parent); dpll_pin_delete_ntf(pin); } refcount_dec(&pin->refcount); - dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv); + dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv, pin); unlock: mutex_unlock(&dpll_lock); return ret; @@ -764,10 +772,10 @@ void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, mutex_lock(&dpll_lock); dpll_pin_delete_ntf(pin); - dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv); + dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv, pin); refcount_dec(&pin->refcount); xa_for_each(&pin->dpll_refs, i, ref) - __dpll_pin_unregister(ref->dpll, pin, ops, priv); + __dpll_pin_unregister(ref->dpll, pin, ops, priv, parent); mutex_unlock(&dpll_lock); } EXPORT_SYMBOL_GPL(dpll_pin_on_pin_unregister); From 0844370f8945086eb9335739d10205dcea8d707b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 24 Apr 2024 12:25:47 +0200 Subject: [PATCH 291/313] tls: fix lockless read of strp->msg_ready in ->poll tls_sk_poll is called without locking the socket, and needs to read strp->msg_ready (via tls_strp_msg_ready). Convert msg_ready to a bool and use READ_ONCE/WRITE_ONCE where needed. The remaining reads are only performed when the socket is locked. Fixes: 121dca784fc0 ("tls: suppress wakeups unless we have a full record") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/0b7ee062319037cf86af6b317b3d72f7bfcd2e97.1713797701.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- include/net/tls.h | 3 ++- net/tls/tls.h | 2 +- net/tls/tls_strp.c | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 340ad43971e4..33f657d3c051 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -111,7 +111,8 @@ struct tls_strparser { u32 stopped : 1; u32 copy_mode : 1; u32 mixed_decrypted : 1; - u32 msg_ready : 1; + + bool msg_ready; struct strp_msg stm; diff --git a/net/tls/tls.h b/net/tls/tls.h index 762f424ff2d5..e5e47452308a 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -215,7 +215,7 @@ static inline struct sk_buff *tls_strp_msg(struct tls_sw_context_rx *ctx) static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx) { - return ctx->strp.msg_ready; + return READ_ONCE(ctx->strp.msg_ready); } static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx) diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index ca1e0e198ceb..5df08d848b5c 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -360,7 +360,7 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb, if (strp->stm.full_len && strp->stm.full_len == skb->len) { desc->count = 0; - strp->msg_ready = 1; + WRITE_ONCE(strp->msg_ready, 1); tls_rx_msg_ready(strp); } @@ -528,7 +528,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp) if (!tls_strp_check_queue_ok(strp)) return tls_strp_read_copy(strp, false); - strp->msg_ready = 1; + WRITE_ONCE(strp->msg_ready, 1); tls_rx_msg_ready(strp); return 0; @@ -580,7 +580,7 @@ void tls_strp_msg_done(struct tls_strparser *strp) else tls_strp_flush_anchor_copy(strp); - strp->msg_ready = 0; + WRITE_ONCE(strp->msg_ready, 0); memset(&strp->stm, 0, sizeof(strp->stm)); tls_strp_check_rcv(strp); From e3eb7dd47bd4806f00e104eb6da092c435f9fb21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20M=C3=BCnster?= Date: Wed, 24 Apr 2024 15:51:52 +0200 Subject: [PATCH 292/313] net: b44: set pause params only when interface is up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit b44_free_rings() accesses b44::rx_buffers (and ::tx_buffers) unconditionally, but b44::rx_buffers is only valid when the device is up (they get allocated in b44_open(), and deallocated again in b44_close()), any other time these are just a NULL pointers. So if you try to change the pause params while the network interface is disabled/administratively down, everything explodes (which likely netifd tries to do). Link: https://github.com/openwrt/openwrt/issues/13789 Fixes: 1da177e4c3f4 (Linux-2.6.12-rc2) Cc: stable@vger.kernel.org Reported-by: Peter Münster Suggested-by: Jonas Gorski Signed-off-by: Vaclav Svoboda Tested-by: Peter Münster Reviewed-by: Andrew Lunn Signed-off-by: Peter Münster Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/87y192oolj.fsf@a16n.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/b44.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 3e4fb3c3e834..1be6d14030bc 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2009,12 +2009,14 @@ static int b44_set_pauseparam(struct net_device *dev, bp->flags |= B44_FLAG_TX_PAUSE; else bp->flags &= ~B44_FLAG_TX_PAUSE; - if (bp->flags & B44_FLAG_PAUSE_AUTO) { - b44_halt(bp); - b44_init_rings(bp); - b44_init_hw(bp, B44_FULL_RESET); - } else { - __b44_set_flow_ctrl(bp, bp->flags); + if (netif_running(dev)) { + if (bp->flags & B44_FLAG_PAUSE_AUTO) { + b44_halt(bp); + b44_init_rings(bp); + b44_init_hw(bp, B44_FULL_RESET); + } else { + __b44_set_flow_ctrl(bp, bp->flags); + } } spin_unlock_irq(&bp->lock); From ded103c7eb23753f22597afa500a7c1ad34116ba Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 22 Apr 2024 11:06:44 +0200 Subject: [PATCH 293/313] kbuild: rust: force `alloc` extern to allow "empty" Rust files If one attempts to build an essentially empty file somewhere in the kernel tree, it leads to a build error because the compiler does not recognize the `new_uninit` unstable feature: error[E0635]: unknown feature `new_uninit` --> :1:9 | 1 | feature(new_uninit) | ^^^^^^^^^^ The reason is that we pass `-Zcrate-attr='feature(new_uninit)'` (together with `-Zallow-features=new_uninit`) to let non-`rust/` code use that unstable feature. However, the compiler only recognizes the feature if the `alloc` crate is resolved (the feature is an `alloc` one). `--extern alloc`, which we pass, is not enough to resolve the crate. Introducing a reference like `use alloc;` or `extern crate alloc;` solves the issue, thus this is not seen in normal files. For instance, `use`ing the `kernel` prelude introduces such a reference, since `alloc` is used inside. While normal use of the build system is not impacted by this, it can still be fairly confusing for kernel developers [1], thus use the unstable `force` option of `--extern` [2] (added in Rust 1.71 [3]) to force the compiler to resolve `alloc`. This new unstable feature is only needed meanwhile we use the other unstable feature, since then we will not need `-Zcrate-attr`. Cc: stable@vger.kernel.org # v6.6+ Reported-by: Daniel Almeida Reported-by: Julian Stecklina Closes: https://rust-for-linux.zulipchat.com/#narrow/stream/288089-General/topic/x/near/424096982 [1] Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Link: https://github.com/rust-lang/rust/issues/111302 [2] Link: https://github.com/rust-lang/rust/pull/109421 [3] Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Link: https://lore.kernel.org/r/20240422090644.525520-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/Makefile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index baf86c0880b6..533a7799fdfe 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -273,7 +273,7 @@ rust_common_cmd = \ -Zallow-features=$(rust_allowed_features) \ -Zcrate-attr=no_std \ -Zcrate-attr='feature($(rust_allowed_features))' \ - --extern alloc --extern kernel \ + -Zunstable-options --extern force:alloc --extern kernel \ --crate-type rlib -L $(objtree)/rust/ \ --crate-name $(basename $(notdir $@)) \ --sysroot=/dev/null \ From 19843452dca40e28d6d3f4793d998b681d505c7f Mon Sep 17 00:00:00 2001 From: Aswin Unnikrishnan Date: Fri, 19 Apr 2024 21:50:13 +0000 Subject: [PATCH 294/313] rust: remove `params` from `module` macro example Remove argument `params` from the `module` macro example, because the macro does not currently support module parameters since it was not sent with the initial merge. Signed-off-by: Aswin Unnikrishnan Reviewed-by: Alice Ryhl Cc: stable@vger.kernel.org Fixes: 1fbde52bde73 ("rust: add `macros` crate") Link: https://lore.kernel.org/r/20240419215015.157258-1-aswinunni01@gmail.com [ Reworded slightly. ] Signed-off-by: Miguel Ojeda --- rust/macros/lib.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs index f489f3157383..520eae5fd792 100644 --- a/rust/macros/lib.rs +++ b/rust/macros/lib.rs @@ -35,18 +35,6 @@ /// author: "Rust for Linux Contributors", /// description: "My very own kernel module!", /// license: "GPL", -/// params: { -/// my_i32: i32 { -/// default: 42, -/// permissions: 0o000, -/// description: "Example of i32", -/// }, -/// writeable_i32: i32 { -/// default: 42, -/// permissions: 0o644, -/// description: "Example of i32", -/// }, -/// }, /// } /// /// struct MyModule; From 1971d13ffa84a551d29a81fdf5b5ec5be166ac83 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 24 Apr 2024 10:04:43 -0700 Subject: [PATCH 295/313] af_unix: Suppress false-positive lockdep splat for spin_lock() in __unix_gc(). syzbot reported a lockdep splat regarding unix_gc_lock and unix_state_lock(). One is called from recvmsg() for a connected socket, and another is called from GC for TCP_LISTEN socket. So, the splat is false-positive. Let's add a dedicated lock class for the latter to suppress the splat. Note that this change is not necessary for net-next.git as the issue is only applied to the old GC impl. [0]: WARNING: possible circular locking dependency detected 6.9.0-rc5-syzkaller-00007-g4d2008430ce8 #0 Not tainted ----------------------------------------------------- kworker/u8:1/11 is trying to acquire lock: ffff88807cea4e70 (&u->lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff88807cea4e70 (&u->lock){+.+.}-{2:2}, at: __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 but task is already holding lock: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: __unix_gc+0x117/0xf70 net/unix/garbage.c:261 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (unix_gc_lock){+.+.}-{2:2}: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] unix_notinflight+0x13d/0x390 net/unix/garbage.c:140 unix_detach_fds net/unix/af_unix.c:1819 [inline] unix_destruct_scm+0x221/0x350 net/unix/af_unix.c:1876 skb_release_head_state+0x100/0x250 net/core/skbuff.c:1188 skb_release_all net/core/skbuff.c:1200 [inline] __kfree_skb net/core/skbuff.c:1216 [inline] kfree_skb_reason+0x16d/0x3b0 net/core/skbuff.c:1252 kfree_skb include/linux/skbuff.h:1262 [inline] manage_oob net/unix/af_unix.c:2672 [inline] unix_stream_read_generic+0x1125/0x2700 net/unix/af_unix.c:2749 unix_stream_splice_read+0x239/0x320 net/unix/af_unix.c:2981 do_splice_read fs/splice.c:985 [inline] splice_file_to_pipe+0x299/0x500 fs/splice.c:1295 do_splice+0xf2d/0x1880 fs/splice.c:1379 __do_splice fs/splice.c:1436 [inline] __do_sys_splice fs/splice.c:1652 [inline] __se_sys_splice+0x331/0x4a0 fs/splice.c:1634 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (&u->lock){+.+.}-{2:2}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18cb/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0xa10/0x17c0 kernel/workqueue.c:3335 worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(unix_gc_lock); lock(&u->lock); lock(unix_gc_lock); lock(&u->lock); *** DEADLOCK *** 3 locks held by kworker/u8:1/11: #0: ffff888015089148 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3229 [inline] #0: ffff888015089148 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_scheduled_works+0x8e0/0x17c0 kernel/workqueue.c:3335 #1: ffffc90000107d00 (unix_gc_work){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3230 [inline] #1: ffffc90000107d00 (unix_gc_work){+.+.}-{0:0}, at: process_scheduled_works+0x91b/0x17c0 kernel/workqueue.c:3335 #2: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] #2: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: __unix_gc+0x117/0xf70 net/unix/garbage.c:261 stack backtrace: CPU: 0 PID: 11 Comm: kworker/u8:1 Not tainted 6.9.0-rc5-syzkaller-00007-g4d2008430ce8 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Workqueue: events_unbound __unix_gc Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18cb/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0xa10/0x17c0 kernel/workqueue.c:3335 worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Fixes: 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()") Reported-and-tested-by: syzbot+fa379358c28cc87cc307@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fa379358c28cc87cc307 Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240424170443.9832-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 3 +++ net/unix/garbage.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 627ea8e2d915..3dee0b2721aa 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -85,6 +85,9 @@ enum unix_socket_lock_class { U_LOCK_NORMAL, U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ + U_LOCK_GC_LISTENER, /* used for listening socket while determining gc + * candidates to close a small race window. + */ }; static inline void unix_state_lock_nested(struct sock *sk, diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 6433a414acf8..0104be9d4704 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -299,7 +299,7 @@ static void __unix_gc(struct work_struct *work) __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); if (sk->sk_state == TCP_LISTEN) { - unix_state_lock(sk); + unix_state_lock_nested(sk, U_LOCK_GC_LISTENER); unix_state_unlock(sk); } } From e027e72ecc1683e04f33aedf0196ad6c3278d309 Mon Sep 17 00:00:00 2001 From: Sergei Antonov Date: Mon, 22 Apr 2024 18:36:07 +0300 Subject: [PATCH 296/313] mmc: moxart: fix handling of sgm->consumed, otherwise WARN_ON triggers When e.g. 8 bytes are to be read, sgm->consumed equals 8 immediately after sg_miter_next() call. The driver then increments it as bytes are read, so sgm->consumed becomes 16 and this warning triggers in sg_miter_stop(): WARN_ON(miter->consumed > miter->length); WARNING: CPU: 0 PID: 28 at lib/scatterlist.c:925 sg_miter_stop+0x2c/0x10c CPU: 0 PID: 28 Comm: kworker/0:2 Tainted: G W 6.9.0-rc5-dirty #249 Hardware name: Generic DT based system Workqueue: events_freezable mmc_rescan Call trace:. unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x44/0x5c dump_stack_lvl from __warn+0x78/0x16c __warn from warn_slowpath_fmt+0xb0/0x160 warn_slowpath_fmt from sg_miter_stop+0x2c/0x10c sg_miter_stop from moxart_request+0xb0/0x468 moxart_request from mmc_start_request+0x94/0xa8 mmc_start_request from mmc_wait_for_req+0x60/0xa8 mmc_wait_for_req from mmc_app_send_scr+0xf8/0x150 mmc_app_send_scr from mmc_sd_setup_card+0x1c/0x420 mmc_sd_setup_card from mmc_sd_init_card+0x12c/0x4dc mmc_sd_init_card from mmc_attach_sd+0xf0/0x16c mmc_attach_sd from mmc_rescan+0x1e0/0x298 mmc_rescan from process_scheduled_works+0x2e4/0x4ec process_scheduled_works from worker_thread+0x1ec/0x24c worker_thread from kthread+0xd4/0xe0 kthread from ret_from_fork+0x14/0x38 This patch adds initial zeroing of sgm->consumed. It is then incremented as bytes are read or written. Signed-off-by: Sergei Antonov Cc: Linus Walleij Fixes: 3ee0e7c3e67c ("mmc: moxart-mmc: Use sg_miter for PIO") Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240422153607.963672-1-saproj@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/moxart-mmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c index b88d6dec209f..9a5f75163aca 100644 --- a/drivers/mmc/host/moxart-mmc.c +++ b/drivers/mmc/host/moxart-mmc.c @@ -300,6 +300,7 @@ static void moxart_transfer_pio(struct moxart_host *host) remain = sgm->length; if (remain > host->data_len) remain = host->data_len; + sgm->consumed = 0; if (data->flags & MMC_DATA_WRITE) { while (remain > 0) { From 17c67ed752d6a456602b3dbb25c5ae4d3de5deab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 6 Dec 2023 14:44:37 +0100 Subject: [PATCH 297/313] selftests: sud_test: return correct emulated syscall value on RISC-V MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the sud_test expects the emulated syscall to return the emulated syscall number. This assumption only works on architectures were the syscall calling convention use the same register for syscall number/syscall return value. This is not the case for RISC-V and thus the return value must be also emulated using the provided ucontext. Signed-off-by: Clément Léger Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20231206134438.473166-1-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- .../selftests/syscall_user_dispatch/sud_test.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c index b5d592d4099e..d975a6767329 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c @@ -158,6 +158,20 @@ static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) /* In preparation for sigreturn. */ SYSCALL_DISPATCH_OFF(glob_sel); + + /* + * The tests for argument handling assume that `syscall(x) == x`. This + * is a NOP on x86 because the syscall number is passed in %rax, which + * happens to also be the function ABI return register. Other + * architectures may need to swizzle the arguments around. + */ +#if defined(__riscv) +/* REG_A7 is not defined in libc headers */ +# define REG_A7 (REG_A0 + 7) + + ((ucontext_t *)ucontext)->uc_mcontext.__gregs[REG_A0] = + ((ucontext_t *)ucontext)->uc_mcontext.__gregs[REG_A7]; +#endif } TEST(dispatch_and_return) From ed74abcd1da0244c3c3be865587dc2727148ee83 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 19 Apr 2024 16:50:27 +0500 Subject: [PATCH 298/313] selftests: mm: protection_keys: save/restore nr_hugepages value from launch script The save/restore of nr_hugepages was added to the test itself by using the atexit() functionality. But it is broken as parent exits after creating child. Hence calling the atexit() function early. That's not it. The child exits after creating its child and so on. The parent cannot wait to get the termination status for its children as it'll keep on holding the resources until the new pkey allocation fails. It is impossible to wait for exits of all the grand and great grand children. Hence the restoring of nr_hugepages value from parent is wrong. Let's save/restore the nr_hugepages settings in the launch script instead of doing it in the test. Link: https://lkml.kernel.org/r/20240419115027.3848958-1-usama.anjum@collabora.com Fixes: c52eb6db7b7d ("selftests: mm: restore settings from only parent process") Signed-off-by: Muhammad Usama Anjum Reported-by: Joey Gouly Closes: https://lore.kernel.org/all/20240418125250.GA2941398@e124191.cambridge.arm.com Cc: Joey Gouly Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/protection_keys.c | 38 -------------------- tools/testing/selftests/mm/run_vmtests.sh | 2 ++ 2 files changed, 2 insertions(+), 38 deletions(-) diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 374a308174d2..48dc151f8fca 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -54,7 +54,6 @@ int test_nr; u64 shadow_pkey_reg; int dprint_in_signal; char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; -char buf[256]; void cat_into_file(char *str, char *file) { @@ -1745,42 +1744,6 @@ void pkey_setup_shadow(void) shadow_pkey_reg = __read_pkey_reg(); } -pid_t parent_pid; - -void restore_settings_atexit(void) -{ - if (parent_pid == getpid()) - cat_into_file(buf, "/proc/sys/vm/nr_hugepages"); -} - -void save_settings(void) -{ - int fd; - int err; - - if (geteuid()) - return; - - fd = open("/proc/sys/vm/nr_hugepages", O_RDONLY); - if (fd < 0) { - fprintf(stderr, "error opening\n"); - perror("error: "); - exit(__LINE__); - } - - /* -1 to guarantee leaving the trailing \0 */ - err = read(fd, buf, sizeof(buf)-1); - if (err < 0) { - fprintf(stderr, "error reading\n"); - perror("error: "); - exit(__LINE__); - } - - parent_pid = getpid(); - atexit(restore_settings_atexit); - close(fd); -} - int main(void) { int nr_iterations = 22; @@ -1788,7 +1751,6 @@ int main(void) srand((unsigned int)time(NULL)); - save_settings(); setup_handlers(); printf("has pkeys: %d\n", pkeys_supported); diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index c2c542fe7b17..4bdb3a0c7a60 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -385,6 +385,7 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0 CATEGORY="ksm" run_test ./ksm_functional_tests # protection_keys tests +nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) if [ -x ./protection_keys_32 ] then CATEGORY="pkey" run_test ./protection_keys_32 @@ -394,6 +395,7 @@ if [ -x ./protection_keys_64 ] then CATEGORY="pkey" run_test ./protection_keys_64 fi +echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages if [ -x ./soft-dirty ] then From 52ccdde16b6540abe43b6f8d8e1e1ec90b0983af Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 19 Apr 2024 16:58:19 +0800 Subject: [PATCH 299/313] mm/hugetlb: fix DEBUG_LOCKS_WARN_ON(1) when dissolve_free_hugetlb_folio() When I did memory failure tests recently, below warning occurs: DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 8 PID: 1011 at kernel/locking/lockdep.c:232 __lock_acquire+0xccb/0x1ca0 Modules linked in: mce_inject hwpoison_inject CPU: 8 PID: 1011 Comm: bash Kdump: loaded Not tainted 6.9.0-rc3-next-20240410-00012-gdb69f219f4be #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 RIP: 0010:__lock_acquire+0xccb/0x1ca0 RSP: 0018:ffffa7a1c7fe3bd0 EFLAGS: 00000082 RAX: 0000000000000000 RBX: eb851eb853975fcf RCX: ffffa1ce5fc1c9c8 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffffa1ce5fc1c9c0 RBP: ffffa1c6865d3280 R08: ffffffffb0f570a8 R09: 0000000000009ffb R10: 0000000000000286 R11: ffffffffb0f2ad50 R12: ffffa1c6865d3d10 R13: ffffa1c6865d3c70 R14: 0000000000000000 R15: 0000000000000004 FS: 00007ff9f32aa740(0000) GS:ffffa1ce5fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ff9f3134ba0 CR3: 00000008484e4000 CR4: 00000000000006f0 Call Trace: lock_acquire+0xbe/0x2d0 _raw_spin_lock_irqsave+0x3a/0x60 hugepage_subpool_put_pages.part.0+0xe/0xc0 free_huge_folio+0x253/0x3f0 dissolve_free_huge_page+0x147/0x210 __page_handle_poison+0x9/0x70 memory_failure+0x4e6/0x8c0 hard_offline_page_store+0x55/0xa0 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x380/0x540 ksys_write+0x64/0xe0 do_syscall_64+0xbc/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ff9f3114887 RSP: 002b:00007ffecbacb458 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007ff9f3114887 RDX: 000000000000000c RSI: 0000564494164e10 RDI: 0000000000000001 RBP: 0000564494164e10 R08: 00007ff9f31d1460 R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000c R13: 00007ff9f321b780 R14: 00007ff9f3217600 R15: 00007ff9f3216a00 Kernel panic - not syncing: kernel: panic_on_warn set ... CPU: 8 PID: 1011 Comm: bash Kdump: loaded Not tainted 6.9.0-rc3-next-20240410-00012-gdb69f219f4be #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 Call Trace: panic+0x326/0x350 check_panic_on_warn+0x4f/0x50 __warn+0x98/0x190 report_bug+0x18e/0x1a0 handle_bug+0x3d/0x70 exc_invalid_op+0x18/0x70 asm_exc_invalid_op+0x1a/0x20 RIP: 0010:__lock_acquire+0xccb/0x1ca0 RSP: 0018:ffffa7a1c7fe3bd0 EFLAGS: 00000082 RAX: 0000000000000000 RBX: eb851eb853975fcf RCX: ffffa1ce5fc1c9c8 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffffa1ce5fc1c9c0 RBP: ffffa1c6865d3280 R08: ffffffffb0f570a8 R09: 0000000000009ffb R10: 0000000000000286 R11: ffffffffb0f2ad50 R12: ffffa1c6865d3d10 R13: ffffa1c6865d3c70 R14: 0000000000000000 R15: 0000000000000004 lock_acquire+0xbe/0x2d0 _raw_spin_lock_irqsave+0x3a/0x60 hugepage_subpool_put_pages.part.0+0xe/0xc0 free_huge_folio+0x253/0x3f0 dissolve_free_huge_page+0x147/0x210 __page_handle_poison+0x9/0x70 memory_failure+0x4e6/0x8c0 hard_offline_page_store+0x55/0xa0 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x380/0x540 ksys_write+0x64/0xe0 do_syscall_64+0xbc/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ff9f3114887 RSP: 002b:00007ffecbacb458 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007ff9f3114887 RDX: 000000000000000c RSI: 0000564494164e10 RDI: 0000000000000001 RBP: 0000564494164e10 R08: 00007ff9f31d1460 R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000c R13: 00007ff9f321b780 R14: 00007ff9f3217600 R15: 00007ff9f3216a00 After git bisecting and digging into the code, I believe the root cause is that _deferred_list field of folio is unioned with _hugetlb_subpool field. In __update_and_free_hugetlb_folio(), folio->_deferred_list is initialized leading to corrupted folio->_hugetlb_subpool when folio is hugetlb. Later free_huge_folio() will use _hugetlb_subpool and above warning happens. But it is assumed hugetlb flag must have been cleared when calling folio_put() in update_and_free_hugetlb_folio(). This assumption is broken due to below race: CPU1 CPU2 dissolve_free_huge_page update_and_free_pages_bulk update_and_free_hugetlb_folio hugetlb_vmemmap_restore_folios folio_clear_hugetlb_vmemmap_optimized clear_flag = folio_test_hugetlb_vmemmap_optimized if (clear_flag) <-- False, it's already cleared. __folio_clear_hugetlb(folio) <-- Hugetlb is not cleared. folio_put free_huge_folio <-- free_the_page is expected. list_for_each_entry() __folio_clear_hugetlb <-- Too late. Fix this issue by checking whether folio is hugetlb directly instead of checking clear_flag to close the race window. Link: https://lkml.kernel.org/r/20240419085819.1901645-1-linmiaohe@huawei.com Fixes: 32c877191e02 ("hugetlb: do not clear hugetlb dtor until allocating vmemmap") Signed-off-by: Miaohe Lin Reviewed-by: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 05371bf54f96..ce7be5c24442 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1781,7 +1781,7 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, * If vmemmap pages were allocated above, then we need to clear the * hugetlb destructor under the hugetlb lock. */ - if (clear_dtor) { + if (folio_test_hugetlb(folio)) { spin_lock_irq(&hugetlb_lock); __clear_hugetlb_destructor(h, folio); spin_unlock_irq(&hugetlb_lock); From 6179d4a213006491ff0d50073256f21fad22149b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=C3=BCllner?= Date: Sun, 7 Apr 2024 23:32:35 +0200 Subject: [PATCH 300/313] riscv: thead: Rename T-Head PBMT to MAE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit T-Head's vendor extension to set page attributes has the name MAE (memory attribute extension). Let's rename it, so it is clear what this referes to. Link: https://github.com/T-head-Semi/thead-extension-spec/blob/master/xtheadmae.adoc Reviewed-by: Conor Dooley Signed-off-by: Christoph Müllner Link: https://lore.kernel.org/r/20240407213236.2121592-2-christoph.muellner@vrull.eu Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.errata | 8 ++++---- arch/riscv/errata/thead/errata.c | 10 +++++----- arch/riscv/include/asm/errata_list.h | 20 ++++++++++---------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index 910ba8837add..2acc7d876e1f 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -82,14 +82,14 @@ config ERRATA_THEAD Otherwise, please say "N" here to avoid unnecessary overhead. -config ERRATA_THEAD_PBMT - bool "Apply T-Head memory type errata" +config ERRATA_THEAD_MAE + bool "Apply T-Head's memory attribute extension (XTheadMae) errata" depends on ERRATA_THEAD && 64BIT && MMU select RISCV_ALTERNATIVE_EARLY default y help - This will apply the memory type errata to handle the non-standard - memory type bits in page-table-entries on T-Head SoCs. + This will apply the memory attribute extension errata to handle the + non-standard PTE utilization on T-Head SoCs (XTheadMae). If you don't know what to do here, say "Y". diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index b1c410bbc1ae..6e7ee1f16bee 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -19,10 +19,10 @@ #include #include -static bool errata_probe_pbmt(unsigned int stage, - unsigned long arch_id, unsigned long impid) +static bool errata_probe_mae(unsigned int stage, + unsigned long arch_id, unsigned long impid) { - if (!IS_ENABLED(CONFIG_ERRATA_THEAD_PBMT)) + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_MAE)) return false; if (arch_id != 0 || impid != 0) @@ -140,8 +140,8 @@ static u32 thead_errata_probe(unsigned int stage, { u32 cpu_req_errata = 0; - if (errata_probe_pbmt(stage, archid, impid)) - cpu_req_errata |= BIT(ERRATA_THEAD_PBMT); + if (errata_probe_mae(stage, archid, impid)) + cpu_req_errata |= BIT(ERRATA_THEAD_MAE); errata_probe_cmo(stage, archid, impid); diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index ea33288f8a25..9bad9dfa2f7a 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -23,7 +23,7 @@ #endif #ifdef CONFIG_ERRATA_THEAD -#define ERRATA_THEAD_PBMT 0 +#define ERRATA_THEAD_MAE 0 #define ERRATA_THEAD_PMU 1 #define ERRATA_THEAD_NUMBER 2 #endif @@ -53,20 +53,20 @@ asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \ * in the default case. */ #define ALT_SVPBMT_SHIFT 61 -#define ALT_THEAD_PBMT_SHIFT 59 +#define ALT_THEAD_MAE_SHIFT 59 #define ALT_SVPBMT(_val, prot) \ asm(ALTERNATIVE_2("li %0, 0\t\nnop", \ "li %0, %1\t\nslli %0,%0,%3", 0, \ RISCV_ISA_EXT_SVPBMT, CONFIG_RISCV_ISA_SVPBMT, \ "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ - ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \ + ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \ : "=r"(_val) \ : "I"(prot##_SVPBMT >> ALT_SVPBMT_SHIFT), \ - "I"(prot##_THEAD >> ALT_THEAD_PBMT_SHIFT), \ + "I"(prot##_THEAD >> ALT_THEAD_MAE_SHIFT), \ "I"(ALT_SVPBMT_SHIFT), \ - "I"(ALT_THEAD_PBMT_SHIFT)) + "I"(ALT_THEAD_MAE_SHIFT)) -#ifdef CONFIG_ERRATA_THEAD_PBMT +#ifdef CONFIG_ERRATA_THEAD_MAE /* * IO/NOCACHE memory types are handled together with svpbmt, * so on T-Head chips, check if no other memory type is set, @@ -83,11 +83,11 @@ asm volatile(ALTERNATIVE( \ "slli t3, t3, %3\n\t" \ "or %0, %0, t3\n\t" \ "2:", THEAD_VENDOR_ID, \ - ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \ + ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \ : "+r"(_val) \ - : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \ - "I"(_PAGE_PMA_THEAD >> ALT_THEAD_PBMT_SHIFT), \ - "I"(ALT_THEAD_PBMT_SHIFT) \ + : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_MAE_SHIFT), \ + "I"(_PAGE_PMA_THEAD >> ALT_THEAD_MAE_SHIFT), \ + "I"(ALT_THEAD_MAE_SHIFT) \ : "t3") #else #define ALT_THEAD_PMA(_val) From 65b71cc35cc6631cb0a5b24f961fe64c085cb40b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=C3=BCllner?= Date: Sun, 7 Apr 2024 23:32:36 +0200 Subject: [PATCH 301/313] riscv: T-Head: Test availability bit before enabling MAE errata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit T-Head's memory attribute extension (XTheadMae) (non-compatible equivalent of RVI's Svpbmt) is currently assumed for all T-Head harts. However, QEMU recently decided to drop acceptance of guests that write reserved bits in PTEs. As XTheadMae uses reserved bits in PTEs and Linux applies the MAE errata for all T-Head harts, this broke the Linux startup on QEMU emulations of the C906 emulation. This patch attempts to address this issue by testing the MAE-enable bit in the th.sxstatus CSR. This CSR is available in HW and can be emulated in QEMU. This patch also makes the XTheadMae probing mechanism reliable, because a test for the right combination of mvendorid, marchid, and mimpid is not sufficient to enable MAE. Reviewed-by: Conor Dooley Signed-off-by: Christoph Müllner Link: https://lore.kernel.org/r/20240407213236.2121592-3-christoph.muellner@vrull.eu Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/thead/errata.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index 6e7ee1f16bee..bf6a0a6318ee 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -19,6 +19,9 @@ #include #include +#define CSR_TH_SXSTATUS 0x5c0 +#define SXSTATUS_MAEE _AC(0x200000, UL) + static bool errata_probe_mae(unsigned int stage, unsigned long arch_id, unsigned long impid) { @@ -28,11 +31,14 @@ static bool errata_probe_mae(unsigned int stage, if (arch_id != 0 || impid != 0) return false; - if (stage == RISCV_ALTERNATIVES_EARLY_BOOT || - stage == RISCV_ALTERNATIVES_MODULE) - return true; + if (stage != RISCV_ALTERNATIVES_EARLY_BOOT && + stage != RISCV_ALTERNATIVES_MODULE) + return false; - return false; + if (!(csr_read(CSR_TH_SXSTATUS) & SXSTATUS_MAEE)) + return false; + + return true; } /* From 8094a600245e9b28eb36a13036f202ad67c1f887 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 25 Apr 2024 11:30:16 -0500 Subject: [PATCH 302/313] smb3: missing lock when picking channel Coverity spotted a place where we should have been holding the channel lock when accessing the ses channel index. Addresses-Coverity: 1582039 ("Data race condition (MISSING_LOCK)") Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/transport.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 994d70193432..e1a79e031b28 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1057,9 +1057,11 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) index = (uint)atomic_inc_return(&ses->chan_seq); index %= ses->chan_count; } + + server = ses->chans[index].server; spin_unlock(&ses->chan_lock); - return ses->chans[index].server; + return server; } int From 8861fd5180476f45f9e8853db154600469a0284f Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 25 Apr 2024 12:49:50 -0500 Subject: [PATCH 303/313] smb3: fix lock ordering potential deadlock in cifs_sync_mid_result Coverity spotted that the cifs_sync_mid_result function could deadlock "Thread deadlock (ORDER_REVERSAL) lock_order: Calling spin_lock acquires lock TCP_Server_Info.srv_lock while holding lock TCP_Server_Info.mid_lock" Addresses-Coverity: 1590401 ("Thread deadlock (ORDER_REVERSAL)") Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/transport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index e1a79e031b28..ddf1a3aafee5 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -909,12 +909,15 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; } + spin_unlock(&server->mid_lock); cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", __func__, mid->mid, mid->mid_state); rc = -EIO; + goto sync_mid_done; } spin_unlock(&server->mid_lock); +sync_mid_done: release_mid(mid); return rc; } From 1d422e44e17c234cef262599e8e5dce6cd3ce28d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 25 Apr 2024 13:57:19 -0700 Subject: [PATCH 304/313] MAINTAINERS: Drop entry for PCA9541 bus master selector I no longer have access to PCA9541 hardware, and I am no longer involved in related development. Listing me as PCA9541 maintainer does not make sense anymore. Remove PCA9541 from MAINTAINERS to let its support default to the generic I2C multiplexer entry. Signed-off-by: Guenter Roeck Acked-by: Peter Rosin Signed-off-by: Wolfram Sang --- MAINTAINERS | 6 ------ 1 file changed, 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ebf03f5f0619..652cce499273 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16799,12 +16799,6 @@ S: Maintained F: drivers/leds/leds-pca9532.c F: include/linux/leds-pca9532.h -PCA9541 I2C BUS MASTER SELECTOR DRIVER -M: Guenter Roeck -L: linux-i2c@vger.kernel.org -S: Maintained -F: drivers/i2c/muxes/i2c-mux-pca9541.c - PCI DRIVER FOR AARDVARK (Marvell Armada 3700) M: Thomas Petazzoni M: Pali Rohár From 190f1f46ede17ca0d7153ac115d6518ec1be2ba3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 24 Apr 2024 23:26:27 +0200 Subject: [PATCH 305/313] MAINTAINERS: Update Uwe's email address, drop SIOX maintenance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the context of changing my career path, my Pengutronix email address will soon stop to be available to me. Update the PWM maintainer entry to my kernel.org identity. I drop my co-maintenance of SIOX. Thorsten will continue to care for it with the support of the Pengutronix kernel team. Signed-off-by: Uwe Kleine-König Acked-by: Thorsten Scherer Link: https://lore.kernel.org/r/20240424212626.603631-2-ukleinek@kernel.org Signed-off-by: Uwe Kleine-König --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ebf03f5f0619..85a32423e9ed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17873,7 +17873,7 @@ F: Documentation/devicetree/bindings/leds/irled/pwm-ir-tx.yaml F: drivers/media/rc/pwm-ir-tx.c PWM SUBSYSTEM -M: Uwe Kleine-König +M: Uwe Kleine-König L: linux-pwm@vger.kernel.org S: Maintained Q: https://patchwork.ozlabs.org/project/linux-pwm/list/ @@ -20177,7 +20177,6 @@ F: include/linux/platform_data/simplefb.h SIOX M: Thorsten Scherer -M: Uwe Kleine-König R: Pengutronix Kernel Team S: Supported F: drivers/gpio/gpio-siox.c From c97f59e276d4e93480f29a70accbd0d7273cf3f5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 26 Apr 2024 12:15:15 +0100 Subject: [PATCH 306/313] netfs: Fix the pre-flush when appending to a file in writethrough mode In netfs_perform_write(), when the file is marked NETFS_ICTX_WRITETHROUGH or O_*SYNC or RWF_*SYNC was specified, write-through caching is performed on a buffered file. When setting up for write-through, we flush any conflicting writes in the region and wait for the write to complete, failing if there's a write error to return. The issue arises if we're writing at or above the EOF position because we skip the flush and - more importantly - the wait. This becomes a problem if there's a partial folio at the end of the file that is being written out and we want to make a write to it too. Both the already-running write and the write we start both want to clear the writeback mark, but whoever is second causes a warning looking something like: ------------[ cut here ]------------ R=00000012: folio 11 is not under writeback WARNING: CPU: 34 PID: 654 at fs/netfs/write_collect.c:105 ... CPU: 34 PID: 654 Comm: kworker/u386:27 Tainted: G S ... ... Workqueue: events_unbound netfs_write_collection_worker ... RIP: 0010:netfs_writeback_lookup_folio Fix this by making the flush-and-wait unconditional. It will do nothing if there are no folios in the pagecache and will return quickly if there are no folios in the region specified. Further, move the WBC attachment above the flush call as the flush is going to attach a WBC and detach it again if it is not present - and since we need one anyway we might as well share it. Fixes: 41d8e7673a77 ("netfs: Implement a write-through caching option") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202404161031.468b84f-oliver.sang@intel.com Signed-off-by: David Howells Link: https://lore.kernel.org/r/2150448.1714130115@warthog.procyon.org.uk Reviewed-by: Jeffrey Layton cc: Eric Van Hensbergen cc: Latchesar Ionkov cc: Dominique Martinet cc: Christian Schoenebeck cc: Marc Dionne cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: v9fs@lists.linux.dev cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 8f13ca8fbc74..267b622d923b 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -172,15 +172,14 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) ) { - if (pos < i_size_read(inode)) { - ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count); - if (ret < 0) { - goto out; - } - } - wbc_attach_fdatawrite_inode(&wbc, mapping->host); + ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count); + if (ret < 0) { + wbc_detach_inode(&wbc); + goto out; + } + wreq = netfs_begin_writethrough(iocb, iter->count); if (IS_ERR(wreq)) { wbc_detach_inode(&wbc); From 9c49085d69ec8ca4eea254d0f426676232549f84 Mon Sep 17 00:00:00 2001 From: Ben Zong-You Xie Date: Tue, 5 Mar 2024 20:05:01 +0800 Subject: [PATCH 307/313] perf riscv: Fix the warning due to the incompatible type In the 32-bit platform, the second argument of getline is expectd to be 'size_t *'(aka 'unsigned int *'), but line_sz is of type 'unsigned long *'. Therefore, declare line_sz as size_t. Signed-off-by: Ben Zong-You Xie Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240305120501.1785084-3-ben717@andestech.com Signed-off-by: Palmer Dabbelt --- tools/perf/arch/riscv/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/riscv/util/header.c b/tools/perf/arch/riscv/util/header.c index 4a41856938a8..1b29030021ee 100644 --- a/tools/perf/arch/riscv/util/header.c +++ b/tools/perf/arch/riscv/util/header.c @@ -41,7 +41,7 @@ static char *_get_cpuid(void) char *mimpid = NULL; char *cpuid = NULL; int read; - unsigned long line_sz; + size_t line_sz; FILE *cpuinfo; cpuinfo = fopen(CPUINFO, "r"); From 49408400d683ae4f41e414dfcb615166cc93be5c Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 22 Mar 2024 14:47:28 +0100 Subject: [PATCH 308/313] RISC-V: selftests: cbo: Ensure asm operands match constraints, take 2 Commit 0de65288d75f ("RISC-V: selftests: cbo: Ensure asm operands match constraints") attempted to ensure MK_CBO() would always provide to a compile-time constant when given a constant, but cpu_to_le32() isn't necessarily going to do that. Switch to manually shifting the bytes, when needed, to finally get this right. Reported-by: Woodrow Shen Closes: https://lore.kernel.org/all/CABquHATcBTUwfLpd9sPObBgNobqQKEAZ2yxk+TWSpyO5xvpXpg@mail.gmail.com/ Fixes: a29e2a48afe3 ("RISC-V: selftests: Add CBO tests") Fixes: 0de65288d75f ("RISC-V: selftests: cbo: Ensure asm operands match constraints") Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20240322134728.151255-2-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- tools/testing/selftests/riscv/hwprobe/cbo.c | 2 +- tools/testing/selftests/riscv/hwprobe/hwprobe.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c index c537d52fafc5..a40541bb7c7d 100644 --- a/tools/testing/selftests/riscv/hwprobe/cbo.c +++ b/tools/testing/selftests/riscv/hwprobe/cbo.c @@ -19,7 +19,7 @@ #include "hwprobe.h" #include "../../kselftest.h" -#define MK_CBO(fn) cpu_to_le32((fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15) +#define MK_CBO(fn) le32_bswap((uint32_t)(fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15) static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 }; diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.h b/tools/testing/selftests/riscv/hwprobe/hwprobe.h index e3fccb390c4d..f3de970c3222 100644 --- a/tools/testing/selftests/riscv/hwprobe/hwprobe.h +++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.h @@ -4,6 +4,16 @@ #include #include +#if __BYTE_ORDER == __BIG_ENDIAN +# define le32_bswap(_x) \ + ((((_x) & 0x000000ffU) << 24) | \ + (((_x) & 0x0000ff00U) << 8) | \ + (((_x) & 0x00ff0000U) >> 8) | \ + (((_x) & 0xff000000U) >> 24)) +#else +# define le32_bswap(_x) (_x) +#endif + /* * Rather than relying on having a new enough libc to define this, just do it * ourselves. This way we don't need to be coupled to a new-enough libc to From 91811a31b68d3765b3065f4bb6d7d6d84a7cfc9f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 26 Apr 2024 08:44:08 +0200 Subject: [PATCH 309/313] i2c: smbus: fix NULL function pointer dereference Baruch reported an OOPS when using the designware controller as target only. Target-only modes break the assumption of one transfer function always being available. Fix this by always checking the pointer in __i2c_transfer. Reported-by: Baruch Siach Closes: https://lore.kernel.org/r/4269631780e5ba789cf1ae391eec1b959def7d99.1712761976.git.baruch@tkos.co.il Fixes: 4b1acc43331d ("i2c: core changes for slave support") [wsa: dropped the simplification in core-smbus to avoid theoretical regressions] Signed-off-by: Wolfram Sang Tested-by: Baruch Siach --- drivers/i2c/i2c-core-base.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index ff5c486a1dbb..db0d1ac82910 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2200,13 +2200,18 @@ static int i2c_check_for_quirks(struct i2c_adapter *adap, struct i2c_msg *msgs, * Returns negative errno, else the number of messages executed. * * Adapter lock must be held when calling this function. No debug logging - * takes place. adap->algo->master_xfer existence isn't checked. + * takes place. */ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { unsigned long orig_jiffies; int ret, try; + if (!adap->algo->master_xfer) { + dev_dbg(&adap->dev, "I2C level transfers not supported\n"); + return -EOPNOTSUPP; + } + if (WARN_ON(!msgs || num < 1)) return -EINVAL; @@ -2273,11 +2278,6 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { int ret; - if (!adap->algo->master_xfer) { - dev_dbg(&adap->dev, "I2C level transfers not supported\n"); - return -EOPNOTSUPP; - } - /* REVISIT the fault reporting model here is weak: * * - When we get an error after receiving N bytes from a slave, From 2e5449f4f21a1b0bd9beec4c4b580eb1f9b9ed7f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 27 Apr 2024 15:27:58 +0900 Subject: [PATCH 310/313] profiling: Remove create_prof_cpu_mask(). create_prof_cpu_mask() is no longer used after commit 1f44a225777e ("s390: convert interrupt handling to use generic hardirq"). Signed-off-by: Tetsuo Handa Signed-off-by: Linus Torvalds --- include/linux/profile.h | 5 ----- kernel/profile.c | 43 ----------------------------------------- 2 files changed, 48 deletions(-) diff --git a/include/linux/profile.h b/include/linux/profile.h index 11db1ec516e2..04ae5ebcb637 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -18,13 +18,8 @@ struct proc_dir_entry; struct notifier_block; #if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS) -void create_prof_cpu_mask(void); int create_proc_profile(void); #else -static inline void create_prof_cpu_mask(void) -{ -} - static inline int create_proc_profile(void) { return 0; diff --git a/kernel/profile.c b/kernel/profile.c index 8a77769bc4b4..2b775cc5c28f 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -344,49 +344,6 @@ void profile_tick(int type) #include #include -static int prof_cpu_mask_proc_show(struct seq_file *m, void *v) -{ - seq_printf(m, "%*pb\n", cpumask_pr_args(prof_cpu_mask)); - return 0; -} - -static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, prof_cpu_mask_proc_show, NULL); -} - -static ssize_t prof_cpu_mask_proc_write(struct file *file, - const char __user *buffer, size_t count, loff_t *pos) -{ - cpumask_var_t new_value; - int err; - - if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) - return -ENOMEM; - - err = cpumask_parse_user(buffer, count, new_value); - if (!err) { - cpumask_copy(prof_cpu_mask, new_value); - err = count; - } - free_cpumask_var(new_value); - return err; -} - -static const struct proc_ops prof_cpu_mask_proc_ops = { - .proc_open = prof_cpu_mask_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, - .proc_write = prof_cpu_mask_proc_write, -}; - -void create_prof_cpu_mask(void) -{ - /* create /proc/irq/prof_cpu_mask */ - proc_create("irq/prof_cpu_mask", 0600, NULL, &prof_cpu_mask_proc_ops); -} - /* * This function accesses profiling information. The returned data is * binary: the sampling step and the actual contents of the profile From 5097cbcb38e6e0d2627c9dde1985e91d2c9f880e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 11 Apr 2024 16:39:05 +0200 Subject: [PATCH 311/313] sched/isolation: Prevent boot crash when the boot CPU is nohz_full Documentation/timers/no_hz.rst states that the "nohz_full=" mask must not include the boot CPU, which is no longer true after: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full"). However after: aae17ebb53cd ("workqueue: Avoid using isolated cpus' timers on queue_delayed_work") the kernel will crash at boot time in this case; housekeeping_any_cpu() returns an invalid CPU number until smp_init() brings the first housekeeping CPU up. Change housekeeping_any_cpu() to check the result of cpumask_any_and() and return smp_processor_id() in this case. This is just the simple and backportable workaround which fixes the symptom, but smp_processor_id() at boot time should be safe at least for type == HK_TYPE_TIMER, this more or less matches the tick_do_timer_boot_cpu logic. There is no worry about cpu_down(); tick_nohz_cpu_down() will not allow to offline tick_do_timer_cpu (the 1st online housekeeping CPU). Fixes: aae17ebb53cd ("workqueue: Avoid using isolated cpus' timers on queue_delayed_work") Reported-by: Chris von Recklinghausen Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Phil Auld Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20240411143905.GA19288@redhat.com Closes: https://lore.kernel.org/all/20240402105847.GA24832@redhat.com/ --- Documentation/timers/no_hz.rst | 7 ++----- kernel/sched/isolation.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst index f8786be15183..7fe8ef9718d8 100644 --- a/Documentation/timers/no_hz.rst +++ b/Documentation/timers/no_hz.rst @@ -129,11 +129,8 @@ adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain online to handle timekeeping tasks in order to ensure that system calls like gettimeofday() returns accurate values on adaptive-tick CPUs. (This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running -user processes to observe slight drifts in clock rate.) Therefore, the -boot CPU is prohibited from entering adaptive-ticks mode. Specifying a -"nohz_full=" mask that includes the boot CPU will result in a boot-time -error message, and the boot CPU will be removed from the mask. Note that -this means that your system must have at least two CPUs in order for +user processes to observe slight drifts in clock rate.) Note that this +means that your system must have at least two CPUs in order for CONFIG_NO_HZ_FULL=y to do anything for you. Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded. diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 373d42c707bc..2a262d3ecb3d 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -46,7 +46,16 @@ int housekeeping_any_cpu(enum hk_type type) if (cpu < nr_cpu_ids) return cpu; - return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask); + cpu = cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask); + if (likely(cpu < nr_cpu_ids)) + return cpu; + /* + * Unless we have another problem this can only happen + * at boot time before start_secondary() brings the 1st + * housekeeping CPU up. + */ + WARN_ON_ONCE(system_state == SYSTEM_RUNNING || + type != HK_TYPE_TIMER); } } return smp_processor_id(); From 257bf89d84121280904800acd25cc2c444c717ae Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 13 Apr 2024 16:17:46 +0200 Subject: [PATCH 312/313] sched/isolation: Fix boot crash when maxcpus < first housekeeping CPU housekeeping_setup() checks cpumask_intersects(present, online) to ensure that the kernel will have at least one housekeeping CPU after smp_init(), but this doesn't work if the maxcpus= kernel parameter limits the number of processors available after bootup. For example, a kernel with "maxcpus=2 nohz_full=0-2" parameters crashes at boot time on a virtual machine with 4 CPUs. Change housekeeping_setup() to use cpumask_first_and() and check that the returned CPU number is valid and less than setup_max_cpus. Another corner case is "nohz_full=0" on a machine with a single CPU or with the maxcpus=1 kernel argument. In this case non_housekeeping_mask is empty and tick_nohz_full_setup() makes no sense. And indeed, the kernel hits the WARN_ON(tick_nohz_full_running) in tick_sched_do_timer(). And how should the kernel interpret the "nohz_full=" parameter? It should be silently ignored, but currently cpulist_parse() happily returns the empty cpumask and this leads to the same problem. Change housekeeping_setup() to check cpumask_empty(non_housekeeping_mask) and do nothing in this case. Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Phil Auld Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20240413141746.GA10008@redhat.com --- kernel/sched/isolation.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 2a262d3ecb3d..5891e715f00d 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -118,6 +118,7 @@ static void __init housekeeping_setup_type(enum hk_type type, static int __init housekeeping_setup(char *str, unsigned long flags) { cpumask_var_t non_housekeeping_mask, housekeeping_staging; + unsigned int first_cpu; int err = 0; if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) { @@ -138,7 +139,8 @@ static int __init housekeeping_setup(char *str, unsigned long flags) cpumask_andnot(housekeeping_staging, cpu_possible_mask, non_housekeeping_mask); - if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) { + first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging); + if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) { __cpumask_set_cpu(smp_processor_id(), housekeeping_staging); __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask); if (!housekeeping.flags) { @@ -147,6 +149,9 @@ static int __init housekeeping_setup(char *str, unsigned long flags) } } + if (cpumask_empty(non_housekeeping_mask)) + goto free_housekeeping_staging; + if (!housekeeping.flags) { /* First setup call ("nohz_full=" or "isolcpus=") */ enum hk_type type; From e67572cd2204894179d89bd7b984072f19313b03 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Apr 2024 13:47:24 -0700 Subject: [PATCH 313/313] Linux 6.9-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 43b10f3d438c..40fb2ca6fe4c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION*