From 7d200b283aa049fcda0d43dd6e03e9e783d2799c Mon Sep 17 00:00:00 2001 From: Jonathan Albrieux Date: Wed, 13 Jan 2021 16:18:07 +0100 Subject: [PATCH 01/67] iio:adc:qcom-spmi-vadc: add default scale to LR_MUX2_BAT_ID channel Checking at both msm8909-pm8916.dtsi and msm8916.dtsi from downstream it is indicated that "batt_id" channel has to be scaled with the default function: chan@31 { label = "batt_id"; reg = <0x31>; qcom,decimation = <0>; qcom,pre-div-channel-scaling = <0>; qcom,calibration-type = "ratiometric"; qcom,scale-function = <0>; qcom,hw-settle-time = <0xb>; qcom,fast-avg-setup = <0>; }; Change LR_MUX2_BAT_ID scaling accordingly. Signed-off-by: Jonathan Albrieux Acked-by: Bjorn Andersson Fixes: 7c271eea7b8a ("iio: adc: spmi-vadc: Changes to support different scaling") Link: https://lore.kernel.org/r/20210113151808.4628-2-jonathan.albrieux@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/qcom-spmi-vadc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/qcom-spmi-vadc.c b/drivers/iio/adc/qcom-spmi-vadc.c index b0388f8a69f4..7e7d408452ec 100644 --- a/drivers/iio/adc/qcom-spmi-vadc.c +++ b/drivers/iio/adc/qcom-spmi-vadc.c @@ -598,7 +598,7 @@ static const struct vadc_channels vadc_chans[] = { VADC_CHAN_NO_SCALE(P_MUX16_1_3, 1) VADC_CHAN_NO_SCALE(LR_MUX1_BAT_THERM, 0) - VADC_CHAN_NO_SCALE(LR_MUX2_BAT_ID, 0) + VADC_CHAN_VOLT(LR_MUX2_BAT_ID, 0, SCALE_DEFAULT) VADC_CHAN_NO_SCALE(LR_MUX3_XO_THERM, 0) VADC_CHAN_NO_SCALE(LR_MUX4_AMUX_THM1, 0) VADC_CHAN_NO_SCALE(LR_MUX5_AMUX_THM2, 0) From d68c592e02f6f49a88e705f13dfc1883432cf300 Mon Sep 17 00:00:00 2001 From: Ye Xiang Date: Sat, 30 Jan 2021 18:25:30 +0800 Subject: [PATCH 02/67] iio: hid-sensor-prox: Fix scale not correct issue Currently, the proxy sensor scale is zero because it just return the exponent directly. To fix this issue, this patch use hid_sensor_format_scale to process the scale first then return the output. Fixes: 39a3a0138f61 ("iio: hid-sensors: Added Proximity Sensor Driver") Signed-off-by: Ye Xiang Link: https://lore.kernel.org/r/20210130102530.31064-1-xiang.ye@intel.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/hid-sensor-prox.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 330cf359e0b8..e9e00ce0c6d4 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -23,6 +23,9 @@ struct prox_state { struct hid_sensor_common common_attributes; struct hid_sensor_hub_attribute_info prox_attr; u32 human_presence; + int scale_pre_decml; + int scale_post_decml; + int scale_precision; }; /* Channel definitions */ @@ -93,8 +96,9 @@ static int prox_read_raw(struct iio_dev *indio_dev, ret_type = IIO_VAL_INT; break; case IIO_CHAN_INFO_SCALE: - *val = prox_state->prox_attr.units; - ret_type = IIO_VAL_INT; + *val = prox_state->scale_pre_decml; + *val2 = prox_state->scale_post_decml; + ret_type = prox_state->scale_precision; break; case IIO_CHAN_INFO_OFFSET: *val = hid_sensor_convert_exponent( @@ -234,6 +238,11 @@ static int prox_parse_report(struct platform_device *pdev, HID_USAGE_SENSOR_HUMAN_PRESENCE, &st->common_attributes.sensitivity); + st->scale_precision = hid_sensor_format_scale( + hsdev->usage, + &st->prox_attr, + &st->scale_pre_decml, &st->scale_post_decml); + return ret; } From f890987fac8153227258121740a9609668c427f3 Mon Sep 17 00:00:00 2001 From: Wilfried Wessner Date: Mon, 8 Feb 2021 15:27:05 +0100 Subject: [PATCH 03/67] iio: adc: ad7949: fix wrong ADC result due to incorrect bit mask Fixes a wrong bit mask used for the ADC's result, which was caused by an improper usage of the GENMASK() macro. The bits higher than ADC's resolution are undefined and if not masked out correctly, a wrong result can be given. The GENMASK() macro indexing is zero based, so the mask has to go from [resolution - 1 , 0]. Fixes: 7f40e0614317f ("iio:adc:ad7949: Add AD7949 ADC driver family") Signed-off-by: Wilfried Wessner Reviewed-by: Andy Shevchenko Reviewed-by: Charles-Antoine Couret Cc: Link: https://lore.kernel.org/r/20210208142705.GA51260@ubuntu Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7949.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7949.c b/drivers/iio/adc/ad7949.c index 5d597e5050f6..1b4b3203e428 100644 --- a/drivers/iio/adc/ad7949.c +++ b/drivers/iio/adc/ad7949.c @@ -91,7 +91,7 @@ static int ad7949_spi_read_channel(struct ad7949_adc_chip *ad7949_adc, int *val, int ret; int i; int bits_per_word = ad7949_adc->resolution; - int mask = GENMASK(ad7949_adc->resolution, 0); + int mask = GENMASK(ad7949_adc->resolution - 1, 0); struct spi_message msg; struct spi_transfer tx[] = { { From be24c65e9fa2486bb8ec98d9f592bdcf04bedd88 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Wed, 10 Feb 2021 12:50:44 +0200 Subject: [PATCH 04/67] iio: adc: adi-axi-adc: add proper Kconfig dependencies The ADI AXI ADC driver requires IO mem access and OF to work. This change adds these dependencies to the Kconfig symbol of the driver. This was also found via the lkp bot, as the devm_platform_ioremap_resource() symbol was not found at link-time on the S390 architecture. Fixes: ef04070692a21 ("iio: adc: adi-axi-adc: add support for AXI ADC IP core") Reported-by: kernel test robot Signed-off-by: Alexandru Ardelean Cc: Link: https://lore.kernel.org/r/20210210105044.48914-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 15587a1bc80d..8d0be5b3029a 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -266,6 +266,8 @@ config ADI_AXI_ADC select IIO_BUFFER select IIO_BUFFER_HW_CONSUMER select IIO_BUFFER_DMAENGINE + depends on HAS_IOMEM + depends on OF help Say yes here to build support for Analog Devices Generic AXI ADC IP core. The IP core is used for interfacing with From a71266e454b5df10d019b06f5ebacd579f76be28 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 16 Feb 2021 22:42:13 +0300 Subject: [PATCH 05/67] iio: adis16400: Fix an error code in adis16400_initial_setup() This is to silence a new Smatch warning: drivers/iio/imu/adis16400.c:492 adis16400_initial_setup() warn: sscanf doesn't return error codes If the condition "if (st->variant->flags & ADIS16400_HAS_SLOW_MODE) {" is false then we return 1 instead of returning 0 and probe will fail. Fixes: 72a868b38bdd ("iio: imu: check sscanf return value") Signed-off-by: Dan Carpenter Cc: Link: https://lore.kernel.org/r/YCwgFb3JVG6qrlQ+@mwanda Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis16400.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/imu/adis16400.c b/drivers/iio/imu/adis16400.c index 54af2ed664f6..785a4ce606d8 100644 --- a/drivers/iio/imu/adis16400.c +++ b/drivers/iio/imu/adis16400.c @@ -462,8 +462,7 @@ static int adis16400_initial_setup(struct iio_dev *indio_dev) if (ret) goto err_ret; - ret = sscanf(indio_dev->name, "adis%u\n", &device_id); - if (ret != 1) { + if (sscanf(indio_dev->name, "adis%u\n", &device_id) != 1) { ret = -EINVAL; goto err_ret; } From 121875b28e3bd7519a675bf8ea2c2e793452c2bd Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 24 Jan 2021 19:50:34 +0000 Subject: [PATCH 06/67] iio:adc:stm32-adc: Add HAS_IOMEM dependency Seems that there are config combinations in which this driver gets enabled and hence selects the MFD, but with out HAS_IOMEM getting pulled in via some other route. MFD is entirely contained in an if HAS_IOMEM block, leading to the build issue in this bugzilla. https://bugzilla.kernel.org/show_bug.cgi?id=209889 Cc: Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20210124195034.22576-1-jic23@kernel.org --- drivers/iio/adc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 8d0be5b3029a..be1f73166a32 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -925,6 +925,7 @@ config STM32_ADC_CORE depends on ARCH_STM32 || COMPILE_TEST depends on OF depends on REGULATOR + depends on HAS_IOMEM select IIO_BUFFER select MFD_STM32_TIMERS select IIO_STM32_TIMER_TRIGGER From 4f5434086d9223f20b3128a7dc78b35271e76655 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 24 Dec 2020 02:17:00 +0100 Subject: [PATCH 07/67] iio: adc: ab8500-gpadc: Fix off by 10 to 3 Fix an off by three orders of magnitude error in the AB8500 GPADC driver. Luckily it showed up quite quickly when trying to make use of it. The processed reads were returning microvolts, microamperes and microcelsius instead of millivolts, milliamperes and millicelsius as advertised. Cc: stable@vger.kernel.org Fixes: 07063bbfa98e ("iio: adc: New driver for the AB8500 GPADC") Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20201224011700.1059659-1-linus.walleij@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ab8500-gpadc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ab8500-gpadc.c b/drivers/iio/adc/ab8500-gpadc.c index 1bb987a4acba..8d81505282dd 100644 --- a/drivers/iio/adc/ab8500-gpadc.c +++ b/drivers/iio/adc/ab8500-gpadc.c @@ -918,7 +918,7 @@ static int ab8500_gpadc_read_raw(struct iio_dev *indio_dev, return processed; /* Return millivolt or milliamps or millicentigrades */ - *val = processed * 1000; + *val = processed; return IIO_VAL_INT; } From fae6f62e6a580b663ecf42c2120a0898deae9137 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 26 Feb 2021 10:29:31 +0900 Subject: [PATCH 08/67] counter: stm32-timer-cnt: Report count function when SLAVE_MODE_DISABLED When in SLAVE_MODE_DISABLED mode, the count still increases if the counter is enabled because an internal clock is used. This patch fixes the stm32_count_function_get() and stm32_count_function_set() functions to properly handle this behavior. Fixes: ad29937e206f ("counter: Add STM32 Timer quadrature encoder") Cc: Fabrice Gasnier Cc: Maxime Coquelin Cc: Alexandre Torgue Signed-off-by: William Breathitt Gray Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20210226012931.161429-1-vilhelm.gray@gmail.com Signed-off-by: Jonathan Cameron --- drivers/counter/stm32-timer-cnt.c | 39 ++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/counter/stm32-timer-cnt.c b/drivers/counter/stm32-timer-cnt.c index ef2a974a2f10..cd50dc12bd02 100644 --- a/drivers/counter/stm32-timer-cnt.c +++ b/drivers/counter/stm32-timer-cnt.c @@ -44,13 +44,14 @@ struct stm32_timer_cnt { * @STM32_COUNT_ENCODER_MODE_3: counts on both TI1FP1 and TI2FP2 edges */ enum stm32_count_function { - STM32_COUNT_SLAVE_MODE_DISABLED = -1, + STM32_COUNT_SLAVE_MODE_DISABLED, STM32_COUNT_ENCODER_MODE_1, STM32_COUNT_ENCODER_MODE_2, STM32_COUNT_ENCODER_MODE_3, }; static enum counter_count_function stm32_count_functions[] = { + [STM32_COUNT_SLAVE_MODE_DISABLED] = COUNTER_COUNT_FUNCTION_INCREASE, [STM32_COUNT_ENCODER_MODE_1] = COUNTER_COUNT_FUNCTION_QUADRATURE_X2_A, [STM32_COUNT_ENCODER_MODE_2] = COUNTER_COUNT_FUNCTION_QUADRATURE_X2_B, [STM32_COUNT_ENCODER_MODE_3] = COUNTER_COUNT_FUNCTION_QUADRATURE_X4, @@ -90,6 +91,9 @@ static int stm32_count_function_get(struct counter_device *counter, regmap_read(priv->regmap, TIM_SMCR, &smcr); switch (smcr & TIM_SMCR_SMS) { + case 0: + *function = STM32_COUNT_SLAVE_MODE_DISABLED; + return 0; case 1: *function = STM32_COUNT_ENCODER_MODE_1; return 0; @@ -99,9 +103,9 @@ static int stm32_count_function_get(struct counter_device *counter, case 3: *function = STM32_COUNT_ENCODER_MODE_3; return 0; + default: + return -EINVAL; } - - return -EINVAL; } static int stm32_count_function_set(struct counter_device *counter, @@ -112,6 +116,9 @@ static int stm32_count_function_set(struct counter_device *counter, u32 cr1, sms; switch (function) { + case STM32_COUNT_SLAVE_MODE_DISABLED: + sms = 0; + break; case STM32_COUNT_ENCODER_MODE_1: sms = 1; break; @@ -122,8 +129,7 @@ static int stm32_count_function_set(struct counter_device *counter, sms = 3; break; default: - sms = 0; - break; + return -EINVAL; } /* Store enable status */ @@ -274,31 +280,36 @@ static int stm32_action_get(struct counter_device *counter, size_t function; int err; - /* Default action mode (e.g. STM32_COUNT_SLAVE_MODE_DISABLED) */ - *action = STM32_SYNAPSE_ACTION_NONE; - err = stm32_count_function_get(counter, count, &function); if (err) - return 0; + return err; switch (function) { + case STM32_COUNT_SLAVE_MODE_DISABLED: + /* counts on internal clock when CEN=1 */ + *action = STM32_SYNAPSE_ACTION_NONE; + return 0; case STM32_COUNT_ENCODER_MODE_1: /* counts up/down on TI1FP1 edge depending on TI2FP2 level */ if (synapse->signal->id == count->synapses[0].signal->id) *action = STM32_SYNAPSE_ACTION_BOTH_EDGES; - break; + else + *action = STM32_SYNAPSE_ACTION_NONE; + return 0; case STM32_COUNT_ENCODER_MODE_2: /* counts up/down on TI2FP2 edge depending on TI1FP1 level */ if (synapse->signal->id == count->synapses[1].signal->id) *action = STM32_SYNAPSE_ACTION_BOTH_EDGES; - break; + else + *action = STM32_SYNAPSE_ACTION_NONE; + return 0; case STM32_COUNT_ENCODER_MODE_3: /* counts up/down on both TI1FP1 and TI2FP2 edges */ *action = STM32_SYNAPSE_ACTION_BOTH_EDGES; - break; + return 0; + default: + return -EINVAL; } - - return 0; } static const struct counter_ops stm32_timer_cnt_ops = { From e4c3e133294c0a292d21073899b05ebf530169bd Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 2 Mar 2021 15:43:55 +0100 Subject: [PATCH 09/67] counter: stm32-timer-cnt: fix ceiling write max value The ceiling value isn't checked before writing it into registers. The user could write a value higher than the counter resolution (e.g. 16 or 32 bits indicated by max_arr). This makes most significant bits to be truncated. Fix it by checking the max_arr to report a range error [1] to the user. [1] https://lkml.org/lkml/2021/2/12/358 Fixes: ad29937e206f ("counter: Add STM32 Timer quadrature encoder") Signed-off-by: Fabrice Gasnier Acked-by: William Breathitt Gray Cc: Link: https://lore.kernel.org/r/1614696235-24088-1-git-send-email-fabrice.gasnier@foss.st.com Signed-off-by: Jonathan Cameron --- drivers/counter/stm32-timer-cnt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/counter/stm32-timer-cnt.c b/drivers/counter/stm32-timer-cnt.c index cd50dc12bd02..2295be3f309a 100644 --- a/drivers/counter/stm32-timer-cnt.c +++ b/drivers/counter/stm32-timer-cnt.c @@ -32,6 +32,7 @@ struct stm32_timer_cnt { struct regmap *regmap; struct clk *clk; u32 ceiling; + u32 max_arr; bool enabled; struct stm32_timer_regs bak; }; @@ -191,6 +192,9 @@ static ssize_t stm32_count_ceiling_write(struct counter_device *counter, if (ret) return ret; + if (ceiling > priv->max_arr) + return -ERANGE; + /* TIMx_ARR register shouldn't be buffered (ARPE=0) */ regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0); regmap_write(priv->regmap, TIM_ARR, ceiling); @@ -371,6 +375,7 @@ static int stm32_timer_cnt_probe(struct platform_device *pdev) priv->regmap = ddata->regmap; priv->clk = ddata->clk; priv->ceiling = ddata->max_arr; + priv->max_arr = ddata->max_arr; priv->counter.name = dev_name(dev); priv->counter.parent = dev; From b14d72ac731753708a7c1a6b3657b9312b6f0042 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 3 Mar 2021 18:49:49 +0100 Subject: [PATCH 10/67] counter: stm32-timer-cnt: fix ceiling miss-alignment with reload register Ceiling value may be miss-aligned with what's actually configured into the ARR register. This is seen after probe as currently the ARR value is zero, whereas ceiling value is set to the maximum. So: - reading ceiling reports zero - in case the counter gets enabled without any prior configuration, it won't count. - in case the function gets set by the user 1st, (priv->ceiling) is used. Fix it by getting rid of the cached "priv->ceiling" variable. Rather use the ARR register value directly by using regmap read or write when needed. There should be no drawback on performance as priv->ceiling isn't used in performance critical path. There's also no point in writing ARR while setting function (sms), so it can be safely removed. Fixes: ad29937e206f ("counter: Add STM32 Timer quadrature encoder") Suggested-by: William Breathitt Gray Signed-off-by: Fabrice Gasnier Acked-by: William Breathitt Gray Cc: Link: https://lore.kernel.org/r/1614793789-10346-1-git-send-email-fabrice.gasnier@foss.st.com Signed-off-by: Jonathan Cameron --- drivers/counter/stm32-timer-cnt.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/counter/stm32-timer-cnt.c b/drivers/counter/stm32-timer-cnt.c index 2295be3f309a..75bc401fdd18 100644 --- a/drivers/counter/stm32-timer-cnt.c +++ b/drivers/counter/stm32-timer-cnt.c @@ -31,7 +31,6 @@ struct stm32_timer_cnt { struct counter_device counter; struct regmap *regmap; struct clk *clk; - u32 ceiling; u32 max_arr; bool enabled; struct stm32_timer_regs bak; @@ -75,8 +74,10 @@ static int stm32_count_write(struct counter_device *counter, const unsigned long val) { struct stm32_timer_cnt *const priv = counter->priv; + u32 ceiling; - if (val > priv->ceiling) + regmap_read(priv->regmap, TIM_ARR, &ceiling); + if (val > ceiling) return -EINVAL; return regmap_write(priv->regmap, TIM_CNT, val); @@ -138,10 +139,6 @@ static int stm32_count_function_set(struct counter_device *counter, regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN, 0); - /* TIMx_ARR register shouldn't be buffered (ARPE=0) */ - regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0); - regmap_write(priv->regmap, TIM_ARR, priv->ceiling); - regmap_update_bits(priv->regmap, TIM_SMCR, TIM_SMCR_SMS, sms); /* Make sure that registers are updated */ @@ -199,7 +196,6 @@ static ssize_t stm32_count_ceiling_write(struct counter_device *counter, regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0); regmap_write(priv->regmap, TIM_ARR, ceiling); - priv->ceiling = ceiling; return len; } @@ -374,7 +370,6 @@ static int stm32_timer_cnt_probe(struct platform_device *pdev) priv->regmap = ddata->regmap; priv->clk = ddata->clk; - priv->ceiling = ddata->max_arr; priv->max_arr = ddata->max_arr; priv->counter.name = dev_name(dev); From efc61345274d6c7a46a0570efbc916fcbe3e927b Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Thu, 18 Feb 2021 10:11:32 -0500 Subject: [PATCH 11/67] ext4: shrink race window in ext4_should_retry_alloc() When generic/371 is run on kvm-xfstests using 5.10 and 5.11 kernels, it fails at significant rates on the two test scenarios that disable delayed allocation (ext3conv and data_journal) and force actual block allocation for the fallocate and pwrite functions in the test. The failure rate on 5.10 for both ext3conv and data_journal on one test system typically runs about 85%. On 5.11, the failure rate on ext3conv sometimes drops to as low as 1% while the rate on data_journal increases to nearly 100%. The observed failures are largely due to ext4_should_retry_alloc() cutting off block allocation retries when s_mb_free_pending (used to indicate that a transaction in progress will free blocks) is 0. However, free space is usually available when this occurs during runs of generic/371. It appears that a thread attempting to allocate blocks is just missing transaction commits in other threads that increase the free cluster count and reset s_mb_free_pending while the allocating thread isn't running. Explicitly testing for free space availability avoids this race. The current code uses a post-increment operator in the conditional expression that determines whether the retry limit has been exceeded. This means that the conditional expression uses the value of the retry counter before it's increased, resulting in an extra retry cycle. The current code actually retries twice before hitting its retry limit rather than once. Increasing the retry limit to 3 from the current actual maximum retry count of 2 in combination with the change described above reduces the observed failure rate to less that 0.1% on both ext3conv and data_journal with what should be limited impact on users sensitive to the overhead caused by retries. A per filesystem percpu counter exported via sysfs is added to allow users or developers to track the number of times the retry limit is exceeded without resorting to debugging methods. This should provide some insight into worst case retry behavior. Signed-off-by: Eric Whitney Link: https://lore.kernel.org/r/20210218151132.19678-1-enwlinux@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/balloc.c | 38 ++++++++++++++++++++++++++------------ fs/ext4/ext4.h | 1 + fs/ext4/super.c | 5 +++++ fs/ext4/sysfs.c | 7 +++++++ 4 files changed, 39 insertions(+), 12 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index f45f9feebe59..74a5172c2d83 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -626,27 +626,41 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi, /** * ext4_should_retry_alloc() - check if a block allocation should be retried - * @sb: super block - * @retries: number of attemps has been made + * @sb: superblock + * @retries: number of retry attempts made so far * - * ext4_should_retry_alloc() is called when ENOSPC is returned, and if - * it is profitable to retry the operation, this function will wait - * for the current or committing transaction to complete, and then - * return TRUE. We will only retry once. + * ext4_should_retry_alloc() is called when ENOSPC is returned while + * attempting to allocate blocks. If there's an indication that a pending + * journal transaction might free some space and allow another attempt to + * succeed, this function will wait for the current or committing transaction + * to complete and then return TRUE. */ int ext4_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) || - (*retries)++ > 1 || - !EXT4_SB(sb)->s_journal) + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (!sbi->s_journal) return 0; + if (++(*retries) > 3) { + percpu_counter_inc(&sbi->s_sra_exceeded_retry_limit); + return 0; + } + + /* + * if there's no indication that blocks are about to be freed it's + * possible we just missed a transaction commit that did so + */ smp_mb(); - if (EXT4_SB(sb)->s_mb_free_pending == 0) - return 0; + if (sbi->s_mb_free_pending == 0) + return ext4_has_free_clusters(sbi, 1, 0); + /* + * it's possible we've just missed a transaction commit here, + * so ignore the returned status + */ jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); - jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); + (void) jbd2_journal_force_commit_nested(sbi->s_journal); return 1; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 644fd69185d3..ea3b41579f38 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1484,6 +1484,7 @@ struct ext4_sb_info { struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; struct percpu_counter s_dirtyclusters_counter; + struct percpu_counter s_sra_exceeded_retry_limit; struct blockgroup_lock *s_blockgroup_lock; struct proc_dir_entry *s_proc; struct kobject s_kobj; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ad34a37278cd..a0a256859662 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1210,6 +1210,7 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); + percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit); percpu_free_rwsem(&sbi->s_writepages_rwsem); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) @@ -5011,6 +5012,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!err) err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, GFP_KERNEL); + if (!err) + err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0, + GFP_KERNEL); if (!err) err = percpu_init_rwsem(&sbi->s_writepages_rwsem); @@ -5124,6 +5128,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); + percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit); percpu_free_rwsem(&sbi->s_writepages_rwsem); failed_mount5: ext4_ext_release(sb); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 075aa3a19ff5..a3d08276d441 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -24,6 +24,7 @@ typedef enum { attr_session_write_kbytes, attr_lifetime_write_kbytes, attr_reserved_clusters, + attr_sra_exceeded_retry_limit, attr_inode_readahead, attr_trigger_test_error, attr_first_error_time, @@ -202,6 +203,7 @@ EXT4_ATTR_FUNC(delayed_allocation_blocks, 0444); EXT4_ATTR_FUNC(session_write_kbytes, 0444); EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444); EXT4_ATTR_FUNC(reserved_clusters, 0644); +EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead, ext4_sb_info, s_inode_readahead_blks); @@ -251,6 +253,7 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(session_write_kbytes), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(reserved_clusters), + ATTR_LIST(sra_exceeded_retry_limit), ATTR_LIST(inode_readahead_blks), ATTR_LIST(inode_goal), ATTR_LIST(mb_stats), @@ -374,6 +377,10 @@ static ssize_t ext4_attr_show(struct kobject *kobj, return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); + case attr_sra_exceeded_retry_limit: + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long) + percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit)); case attr_inode_readahead: case attr_pointer_ui: if (!ptr) From 163f0ec1df33cf468509ff38cbcbb5eb0d7fac60 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 22 Feb 2021 18:16:26 +0100 Subject: [PATCH 12/67] ext4: add reclaim checks to xattr code Syzbot is reporting that ext4 can enter fs reclaim from kvmalloc() while the transaction is started like: fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340 might_alloc include/linux/sched/mm.h:193 [inline] slab_pre_alloc_hook mm/slab.h:493 [inline] slab_alloc_node mm/slub.c:2817 [inline] __kmalloc_node+0x5f/0x430 mm/slub.c:4015 kmalloc_node include/linux/slab.h:575 [inline] kvmalloc_node+0x61/0xf0 mm/util.c:587 kvmalloc include/linux/mm.h:781 [inline] ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline] ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 [inline] ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649 ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224 ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380 ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493 This should be impossible since transaction start sets PF_MEMALLOC_NOFS. Add some assertions to the code to catch if something isn't working as expected early. Link: https://lore.kernel.org/linux-ext4/000000000000563a0205bafb7970@google.com/ Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20210222171626.21884-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/xattr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 372208500f4e..083c95126781 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1462,6 +1462,9 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value, if (!ce) return NULL; + WARN_ON_ONCE(ext4_handle_valid(journal_current_handle()) && + !(current->flags & PF_MEMALLOC_NOFS)); + ea_data = kvmalloc(value_len, GFP_KERNEL); if (!ea_data) { mb_cache_entry_put(ea_inode_cache, ce); @@ -2327,6 +2330,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, error = -ENOSPC; goto cleanup; } + WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS)); } error = ext4_reserve_inode_write(handle, inode, &is.iloc); From f91436d55a279f045987e8b8c1385585dca54be9 Mon Sep 17 00:00:00 2001 From: Sabyrzhan Tasbolatov Date: Wed, 24 Feb 2021 15:58:00 +0600 Subject: [PATCH 13/67] fs/ext4: fix integer overflow in s_log_groups_per_flex syzbot found UBSAN: shift-out-of-bounds in ext4_mb_init [1], when 1 << sbi->s_es->s_log_groups_per_flex is bigger than UINT_MAX, where sbi->s_mb_prefetch is unsigned integer type. 32 is the maximum allowed power of s_log_groups_per_flex. Following if check will also trigger UBSAN shift-out-of-bound: if (1 << sbi->s_es->s_log_groups_per_flex >= UINT_MAX) { So I'm checking it against the raw number, perhaps there is another way to calculate UINT_MAX max power. Also use min_t as to make sure it's uint type. [1] UBSAN: shift-out-of-bounds in fs/ext4/mballoc.c:2713:24 shift exponent 60 is too large for 32-bit type 'int' Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x137/0x1be lib/dump_stack.c:120 ubsan_epilogue lib/ubsan.c:148 [inline] __ubsan_handle_shift_out_of_bounds+0x432/0x4d0 lib/ubsan.c:395 ext4_mb_init_backend fs/ext4/mballoc.c:2713 [inline] ext4_mb_init+0x19bc/0x19f0 fs/ext4/mballoc.c:2898 ext4_fill_super+0xc2ec/0xfbe0 fs/ext4/super.c:4983 Reported-by: syzbot+a8b4b0c60155e87e9484@syzkaller.appspotmail.com Signed-off-by: Sabyrzhan Tasbolatov Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210224095800.3350002-1-snovitoll@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 99bf091fee10..a02fadf4fc84 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2709,8 +2709,15 @@ static int ext4_mb_init_backend(struct super_block *sb) } if (ext4_has_feature_flex_bg(sb)) { - /* a single flex group is supposed to be read by a single IO */ - sbi->s_mb_prefetch = min(1 << sbi->s_es->s_log_groups_per_flex, + /* a single flex group is supposed to be read by a single IO. + * 2 ^ s_log_groups_per_flex != UINT_MAX as s_mb_prefetch is + * unsigned integer, so the maximum shift is 32. + */ + if (sbi->s_es->s_log_groups_per_flex >= 32) { + ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group"); + goto err_freesgi; + } + sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex, BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9)); sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ } else { From c915fb80eaa6194fa9bd0a4487705cd5b0dda2f1 Mon Sep 17 00:00:00 2001 From: Zhaolong Zhang Date: Tue, 2 Mar 2021 17:42:31 +0800 Subject: [PATCH 14/67] ext4: fix bh ref count on error paths __ext4_journalled_writepage should drop bhs' ref count on error paths Signed-off-by: Zhaolong Zhang Link: https://lore.kernel.org/r/1614678151-70481-1-git-send-email-zhangzl2013@126.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 650c5acd2f2d..a79a9ea58c56 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1938,13 +1938,13 @@ static int __ext4_journalled_writepage(struct page *page, if (!ret) ret = err; - if (!ext4_has_inline_data(inode)) - ext4_walk_page_buffers(NULL, page_bufs, 0, len, - NULL, bput_one); ext4_set_inode_state(inode, EXT4_STATE_JDATA); out: unlock_page(page); out_no_pagelock: + if (!inline_data && page_bufs) + ext4_walk_page_buffers(NULL, page_bufs, 0, len, + NULL, bput_one); brelse(inode_bh); return ret; } From 37e89e574dc238a4ebe439543c5ab4fbb2f0311b Mon Sep 17 00:00:00 2001 From: Ye Xiang Date: Wed, 3 Mar 2021 14:36:12 +0800 Subject: [PATCH 15/67] iio: hid-sensor-humidity: Fix alignment issue of timestamp channel This patch ensures that, there is sufficient space and correct alignment for the timestamp. Fixes: d7ed89d5aadf ("iio: hid: Add humidity sensor support") Signed-off-by: Ye Xiang Cc: Link: https://lore.kernel.org/r/20210303063615.12130-2-xiang.ye@intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hid-sensor-humidity.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iio/humidity/hid-sensor-humidity.c b/drivers/iio/humidity/hid-sensor-humidity.c index 52f605114ef7..d62705448ae2 100644 --- a/drivers/iio/humidity/hid-sensor-humidity.c +++ b/drivers/iio/humidity/hid-sensor-humidity.c @@ -15,7 +15,10 @@ struct hid_humidity_state { struct hid_sensor_common common_attributes; struct hid_sensor_hub_attribute_info humidity_attr; - s32 humidity_data; + struct { + s32 humidity_data; + u64 timestamp __aligned(8); + } scan; int scale_pre_decml; int scale_post_decml; int scale_precision; @@ -125,9 +128,8 @@ static int humidity_proc_event(struct hid_sensor_hub_device *hsdev, struct hid_humidity_state *humid_st = iio_priv(indio_dev); if (atomic_read(&humid_st->common_attributes.data_ready)) - iio_push_to_buffers_with_timestamp(indio_dev, - &humid_st->humidity_data, - iio_get_time_ns(indio_dev)); + iio_push_to_buffers_with_timestamp(indio_dev, &humid_st->scan, + iio_get_time_ns(indio_dev)); return 0; } @@ -142,7 +144,7 @@ static int humidity_capture_sample(struct hid_sensor_hub_device *hsdev, switch (usage_id) { case HID_USAGE_SENSOR_ATMOSPHERIC_HUMIDITY: - humid_st->humidity_data = *(s32 *)raw_data; + humid_st->scan.humidity_data = *(s32 *)raw_data; return 0; default: From 141e7633aa4d2838d1f6ad5c74cccc53547c16ac Mon Sep 17 00:00:00 2001 From: Ye Xiang Date: Wed, 3 Mar 2021 14:36:14 +0800 Subject: [PATCH 16/67] iio: hid-sensor-temperature: Fix issues of timestamp channel This patch fixes 2 issues of timestamp channel: 1. This patch ensures that there is sufficient space and correct alignment for the timestamp. 2. Correct the timestamp channel scan index. Fixes: 59d0f2da3569 ("iio: hid: Add temperature sensor support") Signed-off-by: Ye Xiang Cc: Link: https://lore.kernel.org/r/20210303063615.12130-4-xiang.ye@intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/hid-sensor-temperature.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iio/temperature/hid-sensor-temperature.c b/drivers/iio/temperature/hid-sensor-temperature.c index 81688f1b932f..da9a247097fa 100644 --- a/drivers/iio/temperature/hid-sensor-temperature.c +++ b/drivers/iio/temperature/hid-sensor-temperature.c @@ -15,7 +15,10 @@ struct temperature_state { struct hid_sensor_common common_attributes; struct hid_sensor_hub_attribute_info temperature_attr; - s32 temperature_data; + struct { + s32 temperature_data; + u64 timestamp __aligned(8); + } scan; int scale_pre_decml; int scale_post_decml; int scale_precision; @@ -32,7 +35,7 @@ static const struct iio_chan_spec temperature_channels[] = { BIT(IIO_CHAN_INFO_SAMP_FREQ) | BIT(IIO_CHAN_INFO_HYSTERESIS), }, - IIO_CHAN_SOFT_TIMESTAMP(3), + IIO_CHAN_SOFT_TIMESTAMP(1), }; /* Adjust channel real bits based on report descriptor */ @@ -123,9 +126,8 @@ static int temperature_proc_event(struct hid_sensor_hub_device *hsdev, struct temperature_state *temp_st = iio_priv(indio_dev); if (atomic_read(&temp_st->common_attributes.data_ready)) - iio_push_to_buffers_with_timestamp(indio_dev, - &temp_st->temperature_data, - iio_get_time_ns(indio_dev)); + iio_push_to_buffers_with_timestamp(indio_dev, &temp_st->scan, + iio_get_time_ns(indio_dev)); return 0; } @@ -140,7 +142,7 @@ static int temperature_capture_sample(struct hid_sensor_hub_device *hsdev, switch (usage_id) { case HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE: - temp_st->temperature_data = *(s32 *)raw_data; + temp_st->scan.temperature_data = *(s32 *)raw_data; return 0; default: return -EINVAL; From 6dbbbe4cfd398704b72b21c1d4a5d3807e909d60 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 1 Mar 2021 16:04:21 +0800 Subject: [PATCH 17/67] iio: gyro: mpu3050: Fix error handling in mpu3050_trigger_handler There is one regmap_bulk_read() call in mpu3050_trigger_handler that we have caught its return value bug lack further handling. Check and terminate the execution flow just like the other three regmap_bulk_read() calls in this function. Fixes: 3904b28efb2c7 ("iio: gyro: Add driver for the MPU-3050 gyroscope") Signed-off-by: Dinghao Liu Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20210301080421.13436-1-dinghao.liu@zju.edu.cn Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/mpu3050-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index dfa31a23500f..ac90be03332a 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -551,6 +551,8 @@ static irqreturn_t mpu3050_trigger_handler(int irq, void *p) MPU3050_FIFO_R, &fifo_values[offset], toread); + if (ret) + goto out_trigger_unlock; dev_dbg(mpu3050->dev, "%04x %04x %04x %04x %04x\n", From 781e14eaa7d168dc07d2a2eea5c55831a5bb46f3 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 10 Feb 2021 16:06:33 +0200 Subject: [PATCH 18/67] thunderbolt: Initialize HopID IDAs in tb_switch_alloc() If there is a failure before the tb_switch_add() is called the switch object is released by tb_switch_release() but at that point HopID IDAs have not yet been initialized. So we see splat like this: BUG: spinlock bad magic on CPU#2, kworker/u8:5/115 ... Workqueue: thunderbolt0 tb_handle_hotplug Call Trace: dump_stack+0x97/0xdc ? spin_bug+0x9a/0xa7 do_raw_spin_lock+0x68/0x98 _raw_spin_lock_irqsave+0x3f/0x5d ida_destroy+0x4f/0x127 tb_switch_release+0x6d/0xfd device_release+0x2c/0x7d kobject_put+0x9b/0xbc tb_handle_hotplug+0x278/0x452 process_one_work+0x1db/0x396 worker_thread+0x216/0x375 kthread+0x14d/0x155 ? pr_cont_work+0x58/0x58 ? kthread_blkcg+0x2e/0x2e ret_from_fork+0x1f/0x40 Fix this by always initializing HopID IDAs in tb_switch_alloc(). Fixes: 0b2863ac3cfd ("thunderbolt: Add functions for allocating and releasing HopIDs") Cc: stable@vger.kernel.org Reported-by: Chiranjeevi Rapolu Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index b63fecca6c2a..2a95b4ce06c0 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -768,12 +768,6 @@ static int tb_init_port(struct tb_port *port) tb_dump_port(port->sw->tb, &port->config); - /* Control port does not need HopID allocation */ - if (port->port) { - ida_init(&port->in_hopids); - ida_init(&port->out_hopids); - } - INIT_LIST_HEAD(&port->list); return 0; @@ -1842,10 +1836,8 @@ static void tb_switch_release(struct device *dev) dma_port_free(sw->dma_port); tb_switch_for_each_port(sw, port) { - if (!port->disabled) { - ida_destroy(&port->in_hopids); - ida_destroy(&port->out_hopids); - } + ida_destroy(&port->in_hopids); + ida_destroy(&port->out_hopids); } kfree(sw->uuid); @@ -2025,6 +2017,12 @@ struct tb_switch *tb_switch_alloc(struct tb *tb, struct device *parent, /* minimum setup for tb_find_cap and tb_drom_read to work */ sw->ports[i].sw = sw; sw->ports[i].port = i; + + /* Control port does not need HopID allocation */ + if (i) { + ida_init(&sw->ports[i].in_hopids); + ida_init(&sw->ports[i].out_hopids); + } } ret = tb_switch_find_vse_cap(sw, TB_VSE_CAP_PLUG_EVENTS); From c94732bda079ee66b5c3904cbb628d0cb218ab39 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 10 Dec 2020 14:57:10 +0200 Subject: [PATCH 19/67] thunderbolt: Increase runtime PM reference count on DP tunnel discovery If the driver is unbound and then bound back it goes over the topology and figure out the existing tunnels. However, if it finds DP tunnel it should make sure the domain does not runtime suspend as otherwise it will tear down the DP tunnel unexpectedly. Fixes: 6ac6faee5d7d ("thunderbolt: Add runtime PM for Software CM") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 1f000ac1728b..c348b1fc0efc 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -138,6 +138,10 @@ static void tb_discover_tunnels(struct tb_switch *sw) parent->boot = true; parent = tb_switch_parent(parent); } + } else if (tb_tunnel_is_dp(tunnel)) { + /* Keep the domain from powering down */ + pm_runtime_get_sync(&tunnel->src_port->sw->dev); + pm_runtime_get_sync(&tunnel->dst_port->sw->dev); } list_add_tail(&tunnel->list, &tcm->tunnel_list); From f053cf7aa66cd9d592b0fc967f4d887c2abff1b7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 2 Mar 2021 12:04:19 -0800 Subject: [PATCH 20/67] ext4: fix error handling in ext4_end_enable_verity() ext4 didn't properly clean up if verity failed to be enabled on a file: - It left verity metadata (pages past EOF) in the page cache, which would be exposed to userspace if the file was later extended. - It didn't truncate the verity metadata at all (either from cache or from disk) if an error occurred while setting the verity bit. Fix these bugs by adding a call to truncate_inode_pages() and ensuring that we truncate the verity metadata (both from cache and from disk) in all error paths. Also rework the code to cleanly separate the success path from the error paths, which makes it much easier to understand. Reported-by: Yunlei He Fixes: c93d8f885809 ("ext4: add basic fs-verity support") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20210302200420.137977-2-ebiggers@kernel.org Signed-off-by: Theodore Ts'o --- fs/ext4/verity.c | 93 +++++++++++++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 36 deletions(-) diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index 5b7ba8f71153..00e3cbde472e 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -201,55 +201,76 @@ static int ext4_end_enable_verity(struct file *filp, const void *desc, struct inode *inode = file_inode(filp); const int credits = 2; /* superblock and inode for ext4_orphan_del() */ handle_t *handle; + struct ext4_iloc iloc; int err = 0; - int err2; - - if (desc != NULL) { - /* Succeeded; write the verity descriptor. */ - err = ext4_write_verity_descriptor(inode, desc, desc_size, - merkle_tree_size); - - /* Write all pages before clearing VERITY_IN_PROGRESS. */ - if (!err) - err = filemap_write_and_wait(inode->i_mapping); - } - - /* If we failed, truncate anything we wrote past i_size. */ - if (desc == NULL || err) - ext4_truncate(inode); /* - * We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and - * deleting the inode from the orphan list, even if something failed. - * If everything succeeded, we'll also set the verity bit in the same - * transaction. + * If an error already occurred (which fs/verity/ signals by passing + * desc == NULL), then only clean-up is needed. */ + if (desc == NULL) + goto cleanup; - ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + /* Append the verity descriptor. */ + err = ext4_write_verity_descriptor(inode, desc, desc_size, + merkle_tree_size); + if (err) + goto cleanup; + + /* + * Write all pages (both data and verity metadata). Note that this must + * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages + * beyond i_size won't be written properly. For crash consistency, this + * also must happen before the verity inode flag gets persisted. + */ + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto cleanup; + + /* + * Finally, set the verity inode flag and remove the inode from the + * orphan list (in a single transaction). + */ handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); if (IS_ERR(handle)) { - ext4_orphan_del(NULL, inode); - return PTR_ERR(handle); + err = PTR_ERR(handle); + goto cleanup; } - err2 = ext4_orphan_del(handle, inode); - if (err2) - goto out_stop; + err = ext4_orphan_del(handle, inode); + if (err) + goto stop_and_cleanup; - if (desc != NULL && !err) { - struct ext4_iloc iloc; + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + goto stop_and_cleanup; + + ext4_set_inode_flag(inode, EXT4_INODE_VERITY); + ext4_set_inode_flags(inode, false); + err = ext4_mark_iloc_dirty(handle, inode, &iloc); + if (err) + goto stop_and_cleanup; - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (err) - goto out_stop; - ext4_set_inode_flag(inode, EXT4_INODE_VERITY); - ext4_set_inode_flags(inode, false); - err = ext4_mark_iloc_dirty(handle, inode, &iloc); - } -out_stop: ext4_journal_stop(handle); - return err ?: err2; + + ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + return 0; + +stop_and_cleanup: + ext4_journal_stop(handle); +cleanup: + /* + * Verity failed to be enabled, so clean up by truncating any verity + * metadata that was written beyond i_size (both from cache and from + * disk), removing the inode from the orphan list (if it wasn't done + * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS. + */ + truncate_inode_pages(inode->i_mapping, inode->i_size); + ext4_truncate(inode); + ext4_orphan_del(NULL, inode); + ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + return err; } static int ext4_get_verity_descriptor_location(struct inode *inode, From d0dcd90b7f472691de122515eb0d1765808b6d91 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Mon, 8 Mar 2021 08:31:03 +0100 Subject: [PATCH 21/67] usb: cdnsp: Fixes incorrect value in ISOC TRB Fixes issue with priority of operator. Operator "|" priority is higher then "? :". To improve the readability the operator "? :" has been replaced with "if ()" statement. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Pawel Laszczak Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-ring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index f9170d177a89..5f0513c96c04 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -2197,7 +2197,10 @@ static int cdnsp_queue_isoc_tx(struct cdnsp_device *pdev, * inverted in the first TDs isoc TRB. */ field = TRB_TYPE(TRB_ISOC) | TRB_TLBPC(last_burst_pkt) | - start_cycle ? 0 : 1 | TRB_SIA | TRB_TBC(burst_count); + TRB_SIA | TRB_TBC(burst_count); + + if (!start_cycle) + field |= TRB_CYCLE; /* Fill the rest of the TRB fields, and remaining normal TRBs. */ for (i = 0; i < trbs_per_td; i++) { From 08c18b63d9656e0389087d1956d2b37fd7019172 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Mar 2021 06:19:30 +0000 Subject: [PATCH 22/67] powerpc/vdso32: Add missing _restgpr_31_x to fix build failure With some defconfig including CONFIG_CC_OPTIMIZE_FOR_SIZE, (for instance mvme5100_defconfig and ps3_defconfig), gcc 5 generates a call to _restgpr_31_x. Until recently it went unnoticed, but commit 42ed6d56ade2 ("powerpc/vdso: Block R_PPC_REL24 relocations") made it rise to the surface. Provide that function (copied from lib/crtsavres.S) in gettimeofday.S Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a7aa198a88bcd33c6e35e99f70f86c7b7f2f9440.1615270757.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso32/gettimeofday.S | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index a6e29f880e0e..d21d08140a5e 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -65,3 +65,14 @@ V_FUNCTION_END(__kernel_clock_getres) V_FUNCTION_BEGIN(__kernel_time) cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) + +/* Routines for restoring integer registers, called by the compiler. */ +/* Called with r11 pointing to the stack header word of the caller of the */ +/* function, just beyond the end of the integer restore area. */ +_GLOBAL(_restgpr_31_x) +_GLOBAL(_rest32gpr_31_x) + lwz r0,4(r11) + lwz r31,-4(r11) + mtlr r0 + mr r1,r11 + blr From eed5fae00593ab9d261a0c1ffc1bdb786a87a55a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 12:10:34 +0000 Subject: [PATCH 23/67] powerpc: Force inlining of cpu_has_feature() to avoid build failure The code relies on constant folding of cpu_has_feature() based on possible and always true values as defined per CPU_FTRS_ALWAYS and CPU_FTRS_POSSIBLE. Build failure is encountered with for instance book3e_all_defconfig on kisskb in the AMDGPU driver which uses cpu_has_feature(CPU_FTR_VSX_COMP) to decide whether calling kernel_enable_vsx() or not. The failure is due to cpu_has_feature() not being inlined with that configuration with gcc 4.9. In the same way as commit acdad8fb4a15 ("powerpc: Force inlining of mmu_has_feature to fix build failure"), for inlining of cpu_has_feature(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b231dfa040ce4cc37f702f5c3a595fdeabfe0462.1615378209.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cpu_has_feature.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 7897d16e0990..727d4b321937 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -7,7 +7,7 @@ #include #include -static inline bool early_cpu_has_feature(unsigned long feature) +static __always_inline bool early_cpu_has_feature(unsigned long feature) { return !!((CPU_FTRS_ALWAYS & feature) || (CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature)); @@ -46,7 +46,7 @@ static __always_inline bool cpu_has_feature(unsigned long feature) return static_branch_likely(&cpu_feature_keys[i]); } #else -static inline bool cpu_has_feature(unsigned long feature) +static __always_inline bool cpu_has_feature(unsigned long feature) { return early_cpu_has_feature(feature); } From f8d70fd6a5a7a38a95eb8021e00d2e547f88efec Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 13 Mar 2021 14:58:02 +0100 Subject: [PATCH 24/67] MAINTAINERS: move some real subsystems off of the staging mailing list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VME and Android drivers still have their MAINTAINERS entries pointing to the "driverdevel" mailing list, due to them having their codebase move out of the drivers/staging/ directory, but no one remembered to change the mailing list entries. Move them both to linux-kernel for lack of a more specific place at the moment. These are both low-volume areas of the kernel, so this shouldn't be an issue. Cc: Martyn Welch Cc: Manohar Vanga Cc: Arve Hjønnevåg Cc: Todd Kjos Cc: Martijn Coenen Cc: Joel Fernandes Cc: Christian Brauner Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Reported-by: Konstantin Ryabitsev Link: https://lore.kernel.org/r/YEzE6u6U1jkBatmr@kroah.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index aa84121c5611..d7c25c0fc08a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1181,7 +1181,7 @@ M: Joel Fernandes M: Christian Brauner M: Hridya Valsaraju M: Suren Baghdasaryan -L: devel@driverdev.osuosl.org +L: linux-kernel@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git F: drivers/android/ @@ -19135,7 +19135,7 @@ VME SUBSYSTEM M: Martyn Welch M: Manohar Vanga M: Greg Kroah-Hartman -L: devel@driverdev.osuosl.org +L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git F: Documentation/driver-api/vme.rst From e06da9ea3e3f6746a849edeae1d09ee821f5c2ce Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Mar 2021 11:23:11 +0100 Subject: [PATCH 25/67] MAINTAINERS: move the staging subsystem to lists.linux.dev The drivers/staging/ tree has a new mailing list, linux-staging@lists.linux.dev, so move the MAINTAINER entry to point to it so that we get patches sent to the proper place. There was no need to specify a list for the hikey9xx driver, the tools pick up the "base" list for drivers/staging/* so remove that line to make the file simpler. Cc: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210316102311.182375-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d7c25c0fc08a..9e876927c60d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8116,7 +8116,6 @@ F: drivers/crypto/hisilicon/sec2/sec_main.c HISILICON STAGING DRIVERS FOR HIKEY 960/970 M: Mauro Carvalho Chehab -L: devel@driverdev.osuosl.org S: Maintained F: drivers/staging/hikey9xx/ @@ -17040,7 +17039,7 @@ F: drivers/staging/vt665?/ STAGING SUBSYSTEM M: Greg Kroah-Hartman -L: devel@driverdev.osuosl.org +L: linux-staging@lists.linux.dev S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git F: drivers/staging/ From 2e5848a3d86f03024ae096478bdb892ab3d79131 Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Mon, 15 Mar 2021 15:59:14 -0400 Subject: [PATCH 26/67] staging: comedi: cb_pcidas: fix request_irq() warn request_irq() wont accept a name which contains slash so we need to repalce it with something else -- otherwise it will trigger a warning and the entry in /proc/irq/ will not be created since the .name might be used by userspace and we don't want to break userspace, so we are changing the parameters passed to request_irq() [ 1.630764] name 'pci-das1602/16' [ 1.630950] WARNING: CPU: 0 PID: 181 at fs/proc/generic.c:180 __xlate_proc_name+0x93/0xb0 [ 1.634009] RIP: 0010:__xlate_proc_name+0x93/0xb0 [ 1.639441] Call Trace: [ 1.639976] proc_mkdir+0x18/0x20 [ 1.641946] request_threaded_irq+0xfe/0x160 [ 1.642186] cb_pcidas_auto_attach+0xf4/0x610 [cb_pcidas] Suggested-by: Ian Abbott Reviewed-by: Ian Abbott Signed-off-by: Tong Zhang Link: https://lore.kernel.org/r/20210315195914.4801-1-ztong0001@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/cb_pcidas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/cb_pcidas.c b/drivers/staging/comedi/drivers/cb_pcidas.c index d740c4782775..2f20bd56ec6c 100644 --- a/drivers/staging/comedi/drivers/cb_pcidas.c +++ b/drivers/staging/comedi/drivers/cb_pcidas.c @@ -1281,7 +1281,7 @@ static int cb_pcidas_auto_attach(struct comedi_device *dev, devpriv->amcc + AMCC_OP_REG_INTCSR); ret = request_irq(pcidev->irq, cb_pcidas_interrupt, IRQF_SHARED, - dev->board_name, dev); + "cb_pcidas", dev); if (ret) { dev_dbg(dev->class_dev, "unable to allocate irq %d\n", pcidev->irq); From d2d106fe3badfc3bf0dd3899d1c3f210c7203eab Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Mon, 15 Mar 2021 15:58:12 -0400 Subject: [PATCH 27/67] staging: comedi: cb_pcidas64: fix request_irq() warn request_irq() wont accept a name which contains slash so we need to repalce it with something else -- otherwise it will trigger a warning and the entry in /proc/irq/ will not be created since the .name might be used by userspace and we don't want to break userspace, so we are changing the parameters passed to request_irq() [ 1.565966] name 'pci-das6402/16' [ 1.566149] WARNING: CPU: 0 PID: 184 at fs/proc/generic.c:180 __xlate_proc_name+0x93/0xb0 [ 1.568923] RIP: 0010:__xlate_proc_name+0x93/0xb0 [ 1.574200] Call Trace: [ 1.574722] proc_mkdir+0x18/0x20 [ 1.576629] request_threaded_irq+0xfe/0x160 [ 1.576859] auto_attach+0x60a/0xc40 [cb_pcidas64] Suggested-by: Ian Abbott Reviewed-by: Ian Abbott Signed-off-by: Tong Zhang Link: https://lore.kernel.org/r/20210315195814.4692-1-ztong0001@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/cb_pcidas64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/cb_pcidas64.c b/drivers/staging/comedi/drivers/cb_pcidas64.c index fa987bb0e7cd..6d3ba399a7f0 100644 --- a/drivers/staging/comedi/drivers/cb_pcidas64.c +++ b/drivers/staging/comedi/drivers/cb_pcidas64.c @@ -4035,7 +4035,7 @@ static int auto_attach(struct comedi_device *dev, init_stc_registers(dev); retval = request_irq(pcidev->irq, handle_interrupt, IRQF_SHARED, - dev->board_name, dev); + "cb_pcidas64", dev); if (retval) { dev_dbg(dev->class_dev, "unable to allocate irq %u\n", pcidev->irq); From ef4cb70a4c22bf301cd757dcc838dc8ca9526477 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 2 Mar 2021 18:14:53 +0200 Subject: [PATCH 28/67] genirq/irq_sim: Fix typos in kernel doc (fnode -> fwnode) Fix typos in kernel doc, otherwise validation script complains: .../irq_sim.c:170: warning: Function parameter or member 'fwnode' not described in 'irq_domain_create_sim' .../irq_sim.c:170: warning: Excess function parameter 'fnode' description in 'irq_domain_create_sim' .../irq_sim.c:240: warning: Function parameter or member 'fwnode' not described in 'devm_irq_domain_create_sim' .../irq_sim.c:240: warning: Excess function parameter 'fnode' description in 'devm_irq_domain_create_sim' Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210302161453.28540-1-andriy.shevchenko@linux.intel.com --- kernel/irq/irq_sim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c index 48006608baf0..40880c350b95 100644 --- a/kernel/irq/irq_sim.c +++ b/kernel/irq/irq_sim.c @@ -159,7 +159,7 @@ static const struct irq_domain_ops irq_sim_domain_ops = { * irq_domain_create_sim - Create a new interrupt simulator irq_domain and * allocate a range of dummy interrupts. * - * @fnode: struct fwnode_handle to be associated with this domain. + * @fwnode: struct fwnode_handle to be associated with this domain. * @num_irqs: Number of interrupts to allocate. * * On success: return a new irq_domain object. @@ -228,7 +228,7 @@ static void devm_irq_domain_release_sim(struct device *dev, void *res) * a managed device. * * @dev: Device to initialize the simulator object for. - * @fnode: struct fwnode_handle to be associated with this domain. + * @fwnode: struct fwnode_handle to be associated with this domain. * @num_irqs: Number of interrupts to allocate * * On success: return a new irq_domain object. From d88d05a9e0b6d9356e97129d4ff9942d765f46ea Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 12 Mar 2021 05:21:37 -0800 Subject: [PATCH 29/67] perf/x86/intel: Fix a crash caused by zero PEBS status A repeatable crash can be triggered by the perf_fuzzer on some Haswell system. https://lore.kernel.org/lkml/7170d3b-c17f-1ded-52aa-cc6d9ae999f4@maine.edu/ For some old CPUs (HSW and earlier), the PEBS status in a PEBS record may be mistakenly set to 0. To minimize the impact of the defect, the commit was introduced to try to avoid dropping the PEBS record for some cases. It adds a check in the intel_pmu_drain_pebs_nhm(), and updates the local pebs_status accordingly. However, it doesn't correct the PEBS status in the PEBS record, which may trigger the crash, especially for the large PEBS. It's possible that all the PEBS records in a large PEBS have the PEBS status 0. If so, the first get_next_pebs_record_by_bit() in the __intel_pmu_pebs_event() returns NULL. The at = NULL. Since it's a large PEBS, the 'count' parameter must > 1. The second get_next_pebs_record_by_bit() will crash. Besides the local pebs_status, correct the PEBS status in the PEBS record as well. Fixes: 01330d7288e0 ("perf/x86: Allow zero PEBS status with only single active event") Reported-by: Vince Weaver Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1615555298-140216-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7ebae1826403..d32b302719fe 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2010,7 +2010,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d */ if (!pebs_status && cpuc->pebs_enabled && !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1))) - pebs_status = cpuc->pebs_enabled; + pebs_status = p->status = cpuc->pebs_enabled; bit = find_first_bit((unsigned long *)&pebs_status, x86_pmu.max_pebs_events); From 2dc0572f2cef87425147658698dce2600b799bd3 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 12 Mar 2021 05:21:38 -0800 Subject: [PATCH 30/67] perf/x86/intel: Fix unchecked MSR access error caused by VLBR_EVENT On a Haswell machine, the perf_fuzzer managed to trigger this message: [117248.075892] unchecked MSR access error: WRMSR to 0x3f1 (tried to write 0x0400000000000000) at rIP: 0xffffffff8106e4f4 (native_write_msr+0x4/0x20) [117248.089957] Call Trace: [117248.092685] intel_pmu_pebs_enable_all+0x31/0x40 [117248.097737] intel_pmu_enable_all+0xa/0x10 [117248.102210] __perf_event_task_sched_in+0x2df/0x2f0 [117248.107511] finish_task_switch.isra.0+0x15f/0x280 [117248.112765] schedule_tail+0xc/0x40 [117248.116562] ret_from_fork+0x8/0x30 A fake event called VLBR_EVENT may use the bit 58 of the PEBS_ENABLE, if the precise_ip is set. The bit 58 is reserved by the HW. Accessing the bit causes the unchecked MSR access error. The fake event doesn't support PEBS. The case should be rejected. Fixes: 097e4311cda9 ("perf/x86: Add constraint to create guest LBR event without hw counter") Reported-by: Vince Weaver Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1615555298-140216-2-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7bbb5bb98d8c..37ce38403cb8 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3659,6 +3659,9 @@ static int intel_pmu_hw_config(struct perf_event *event) return ret; if (event->attr.precise_ip) { + if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) + return -EINVAL; + if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & From 5abbe51a526253b9f003e9a0a195638dc882d660 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Feb 2021 18:46:41 +0100 Subject: [PATCH 31/67] kernel, fs: Introduce and use set_restart_fn() and arch_set_restart_data() Preparation for fixing get_nr_restart_syscall() on X86 for COMPAT. Add a new helper which sets restart_block->fn and calls a dummy arch_set_restart_data() helper. Fixes: 609c19a385c8 ("x86/ptrace: Stop setting TS_COMPAT in ptrace code") Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210201174641.GA17871@redhat.com --- fs/select.c | 10 ++++------ include/linux/thread_info.h | 13 +++++++++++++ kernel/futex.c | 3 +-- kernel/time/alarmtimer.c | 2 +- kernel/time/hrtimer.c | 2 +- kernel/time/posix-cpu-timers.c | 2 +- 6 files changed, 21 insertions(+), 11 deletions(-) diff --git a/fs/select.c b/fs/select.c index 37aaa8317f3a..945896d0ac9e 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1055,10 +1055,9 @@ static long do_restart_poll(struct restart_block *restart_block) ret = do_sys_poll(ufds, nfds, to); - if (ret == -ERESTARTNOHAND) { - restart_block->fn = do_restart_poll; - ret = -ERESTART_RESTARTBLOCK; - } + if (ret == -ERESTARTNOHAND) + ret = set_restart_fn(restart_block, do_restart_poll); + return ret; } @@ -1080,7 +1079,6 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, struct restart_block *restart_block; restart_block = ¤t->restart_block; - restart_block->fn = do_restart_poll; restart_block->poll.ufds = ufds; restart_block->poll.nfds = nfds; @@ -1091,7 +1089,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, } else restart_block->poll.has_timeout = 0; - ret = -ERESTART_RESTARTBLOCK; + ret = set_restart_fn(restart_block, do_restart_poll); } return ret; } diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 9b2158c69275..157762db9d4b 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef CONFIG_THREAD_INFO_IN_TASK /* @@ -59,6 +60,18 @@ enum syscall_work_bit { #ifdef __KERNEL__ +#ifndef arch_set_restart_data +#define arch_set_restart_data(restart) do { } while (0) +#endif + +static inline long set_restart_fn(struct restart_block *restart, + long (*fn)(struct restart_block *)) +{ + restart->fn = fn; + arch_set_restart_data(restart); + return -ERESTART_RESTARTBLOCK; +} + #ifndef THREAD_ALIGN #define THREAD_ALIGN THREAD_SIZE #endif diff --git a/kernel/futex.c b/kernel/futex.c index e68db7745039..00febd6dea9c 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2728,14 +2728,13 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, goto out; restart = ¤t->restart_block; - restart->fn = futex_wait_restart; restart->futex.uaddr = uaddr; restart->futex.val = val; restart->futex.time = *abs_time; restart->futex.bitset = bitset; restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; - ret = -ERESTART_RESTARTBLOCK; + ret = set_restart_fn(restart, futex_wait_restart); out: if (to) { diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 98d7a15e8cf6..4d94e2b5499d 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -854,9 +854,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, if (flags == TIMER_ABSTIME) return -ERESTARTNOHAND; - restart->fn = alarm_timer_nsleep_restart; restart->nanosleep.clockid = type; restart->nanosleep.expires = exp; + set_restart_fn(restart, alarm_timer_nsleep_restart); return ret; } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 788b9d137de4..5c9d968187ae 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1957,9 +1957,9 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, } restart = ¤t->restart_block; - restart->fn = hrtimer_nanosleep_restart; restart->nanosleep.clockid = t.timer.base->clockid; restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); + set_restart_fn(restart, hrtimer_nanosleep_restart); out: destroy_hrtimer_on_stack(&t.timer); return ret; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index a71758e34e45..9abe15255bc4 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1480,8 +1480,8 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, if (flags & TIMER_ABSTIME) return -ERESTARTNOHAND; - restart_block->fn = posix_cpu_nsleep_restart; restart_block->nanosleep.clockid = which_clock; + set_restart_fn(restart_block, posix_cpu_nsleep_restart); } return error; } From 66c1b6d74cd7035e85c426f0af4aede19e805c8a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Feb 2021 18:46:49 +0100 Subject: [PATCH 32/67] x86: Move TS_COMPAT back to asm/thread_info.h Move TS_COMPAT back to asm/thread_info.h, close to TS_I386_REGS_POKED. It was moved to asm/processor.h by b9d989c7218a ("x86/asm: Move the thread_info::status field to thread_struct"), then later 37a8f7c38339 ("x86/asm: Move 'status' from thread_struct to thread_info") moved the 'status' field back but TS_COMPAT was forgotten. Preparatory patch to fix the COMPAT case for get_nr_restart_syscall() Fixes: 609c19a385c8 ("x86/ptrace: Stop setting TS_COMPAT in ptrace code") Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210201174649.GA17880@redhat.com --- arch/x86/include/asm/processor.h | 9 --------- arch/x86/include/asm/thread_info.h | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index dc6d149bf851..f1b9ed5efaa9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -551,15 +551,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, *size = fpu_kernel_xstate_size; } -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ - static inline void native_load_sp0(unsigned long sp0) { diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 0d751d5da702..c2dc29e215ea 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -205,6 +205,15 @@ static inline int arch_within_stack_frames(const void * const stack, #endif +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ + #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ #endif From 8c150ba2fb5995c84a7a43848250d444a3329a7d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Feb 2021 18:47:09 +0100 Subject: [PATCH 33/67] x86: Introduce TS_COMPAT_RESTART to fix get_nr_restart_syscall() The comment in get_nr_restart_syscall() says: * The problem is that we can get here when ptrace pokes * syscall-like values into regs even if we're not in a syscall * at all. Yes, but if not in a syscall then the status & (TS_COMPAT|TS_I386_REGS_POKED) check below can't really help: - TS_COMPAT can't be set - TS_I386_REGS_POKED is only set if regs->orig_ax was changed by 32bit debugger; and even in this case get_nr_restart_syscall() is only correct if the tracee is 32bit too. Suppose that a 64bit debugger plays with a 32bit tracee and * Tracee calls sleep(2) // TS_COMPAT is set * User interrupts the tracee by CTRL-C after 1 sec and does "(gdb) call func()" * gdb saves the regs by PTRACE_GETREGS * does PTRACE_SETREGS to set %rip='func' and %orig_rax=-1 * PTRACE_CONT // TS_COMPAT is cleared * func() hits int3. * Debugger catches SIGTRAP. * Restore original regs by PTRACE_SETREGS. * PTRACE_CONT get_nr_restart_syscall() wrongly returns __NR_restart_syscall==219, the tracee calls ia32_sys_call_table[219] == sys_madvise. Add the sticky TS_COMPAT_RESTART flag which survives after return to user mode. It's going to be removed in the next step again by storing the information in the restart block. As a further cleanup it might be possible to remove also TS_I386_REGS_POKED with that. Test-case: $ cvs -d :pserver:anoncvs:anoncvs@sourceware.org:/cvs/systemtap co ptrace-tests $ gcc -o erestartsys-trap-debuggee ptrace-tests/tests/erestartsys-trap-debuggee.c --m32 $ gcc -o erestartsys-trap-debugger ptrace-tests/tests/erestartsys-trap-debugger.c -lutil $ ./erestartsys-trap-debugger Unexpected: retval 1, errno 22 erestartsys-trap-debugger: ptrace-tests/tests/erestartsys-trap-debugger.c:421 Fixes: 609c19a385c8 ("x86/ptrace: Stop setting TS_COMPAT in ptrace code") Reported-by: Jan Kratochvil Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210201174709.GA17895@redhat.com --- arch/x86/include/asm/thread_info.h | 14 +++++++++++++- arch/x86/kernel/signal.c | 24 +----------------------- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index c2dc29e215ea..30d1d187019f 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -214,10 +214,22 @@ static inline int arch_within_stack_frames(const void * const stack, */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ +#ifndef __ASSEMBLY__ #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ +#define TS_COMPAT_RESTART 0x0008 + +#define arch_set_restart_data arch_set_restart_data + +static inline void arch_set_restart_data(struct restart_block *restart) +{ + struct thread_info *ti = current_thread_info(); + if (ti->status & TS_COMPAT) + ti->status |= TS_COMPAT_RESTART; + else + ti->status &= ~TS_COMPAT_RESTART; +} #endif -#ifndef __ASSEMBLY__ #ifdef CONFIG_X86_32 #define in_ia32_syscall() true diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ea794a083c44..6c26d2c3a2e4 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -766,30 +766,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { - /* - * This function is fundamentally broken as currently - * implemented. - * - * The idea is that we want to trigger a call to the - * restart_block() syscall and that we want in_ia32_syscall(), - * in_x32_syscall(), etc. to match whatever they were in the - * syscall being restarted. We assume that the syscall - * instruction at (regs->ip - 2) matches whatever syscall - * instruction we used to enter in the first place. - * - * The problem is that we can get here when ptrace pokes - * syscall-like values into regs even if we're not in a syscall - * at all. - * - * For now, we maintain historical behavior and guess based on - * stored state. We could do better by saving the actual - * syscall arch in restart_block or (with caveats on x32) by - * checking if regs->ip points to 'int $0x80'. The current - * behavior is incorrect if a tracer has a different bitness - * than the tracee. - */ #ifdef CONFIG_IA32_EMULATION - if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (current_thread_info()->status & TS_COMPAT_RESTART) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI From b2e9df850c58c2b36e915e7d3bed3f6107cccba6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Feb 2021 18:47:16 +0100 Subject: [PATCH 34/67] x86: Introduce restart_block->arch_data to remove TS_COMPAT_RESTART Save the current_thread_info()->status of X86 in the new restart_block->arch_data field so TS_COMPAT_RESTART can be removed again. Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210201174716.GA17898@redhat.com --- arch/x86/include/asm/thread_info.h | 12 ++---------- arch/x86/kernel/signal.c | 2 +- include/linux/restart_block.h | 1 + 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 30d1d187019f..06b740bae431 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -217,18 +217,10 @@ static inline int arch_within_stack_frames(const void * const stack, #ifndef __ASSEMBLY__ #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ -#define TS_COMPAT_RESTART 0x0008 -#define arch_set_restart_data arch_set_restart_data +#define arch_set_restart_data(restart) \ + do { restart->arch_data = current_thread_info()->status; } while (0) -static inline void arch_set_restart_data(struct restart_block *restart) -{ - struct thread_info *ti = current_thread_info(); - if (ti->status & TS_COMPAT) - ti->status |= TS_COMPAT_RESTART; - else - ti->status &= ~TS_COMPAT_RESTART; -} #endif #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6c26d2c3a2e4..f306e85a08a6 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -767,7 +767,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { #ifdef CONFIG_IA32_EMULATION - if (current_thread_info()->status & TS_COMPAT_RESTART) + if (current->restart_block.arch_data & TS_COMPAT) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h index bba2920e9c05..980a65594412 100644 --- a/include/linux/restart_block.h +++ b/include/linux/restart_block.h @@ -23,6 +23,7 @@ enum timespec_type { * System call restart block. */ struct restart_block { + unsigned long arch_data; long (*fn)(struct restart_block *); union { /* For futex_wait and futex_wait_requeue_pi */ From cc7a0bb058b85ea03db87169c60c7cfdd5d34678 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Mon, 15 Mar 2021 15:48:21 -0600 Subject: [PATCH 35/67] PCI: rpadlpar: Fix potential drc_name corruption in store functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both add_slot_store() and remove_slot_store() try to fix up the drc_name copied from the store buffer by placing a NUL terminator at nbyte + 1 or in place of a '\n' if present. However, the static buffer that we copy the drc_name data into is not zeroed and can contain anything past the n-th byte. This is problematic if a '\n' byte appears in that buffer after nbytes and the string copied into the store buffer was not NUL terminated to start with as the strchr() search for a '\n' byte will mark this incorrectly as the end of the drc_name string resulting in a drc_name string that contains garbage data after the n-th byte. Additionally it will cause us to overwrite that '\n' byte on the stack with NUL, potentially corrupting data on the stack. The following debugging shows an example of the drmgr utility writing "PHB 4543" to the add_slot sysfs attribute, but add_slot_store() logging a corrupted string value. drmgr: drmgr: -c phb -a -s PHB 4543 -d 1 add_slot_store: drc_name = PHB 4543°|<82>!, rc = -19 Fix this by using strscpy() instead of memcpy() to ensure the string is NUL terminated when copied into the static drc_name buffer. Further, since the string is now NUL terminated the code only needs to change '\n' to '\0' when present. Cc: stable@vger.kernel.org Signed-off-by: Tyrel Datwyler [mpe: Reformat change log and add mention of possible stack corruption] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210315214821.452959-1-tyreld@linux.ibm.com --- drivers/pci/hotplug/rpadlpar_sysfs.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c index cdbfa5df3a51..dbfa0b55d31a 100644 --- a/drivers/pci/hotplug/rpadlpar_sysfs.c +++ b/drivers/pci/hotplug/rpadlpar_sysfs.c @@ -34,12 +34,11 @@ static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr, if (nbytes >= MAX_DRC_NAME_LEN) return 0; - memcpy(drc_name, buf, nbytes); + strscpy(drc_name, buf, nbytes + 1); end = strchr(drc_name, '\n'); - if (!end) - end = &drc_name[nbytes]; - *end = '\0'; + if (end) + *end = '\0'; rc = dlpar_add_slot(drc_name); if (rc) @@ -65,12 +64,11 @@ static ssize_t remove_slot_store(struct kobject *kobj, if (nbytes >= MAX_DRC_NAME_LEN) return 0; - memcpy(drc_name, buf, nbytes); + strscpy(drc_name, buf, nbytes + 1); end = strchr(drc_name, '\n'); - if (!end) - end = &drc_name[nbytes]; - *end = '\0'; + if (end) + *end = '\0'; rc = dlpar_remove_slot(drc_name); if (rc) From 483028edacab374060d93955382b4865a9e07cba Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 17 Mar 2021 14:36:06 +0800 Subject: [PATCH 36/67] efivars: respect EFI_UNSUPPORTED return from firmware As per UEFI spec 2.8B section 8.2, EFI_UNSUPPORTED may be returned by EFI variable runtime services if no variable storage is supported by firmware. In this case, there is no point for kernel to continue efivars initialization. That said, efivar_init() should fail by returning an error code, so that efivarfs will not be mounted on /sys/firmware/efi/efivars at all. Otherwise, user space like efibootmgr will be confused by the EFIVARFS_MAGIC seen there, while EFI variable calls cannot be made successfully. Cc: # v5.10+ Signed-off-by: Shawn Guo Acked-by: Ard Biesheuvel Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/vars.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 41c1d00bf933..abdc8a6a3963 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -484,6 +484,10 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), } } + break; + case EFI_UNSUPPORTED: + err = -EOPNOTSUPP; + status = EFI_NOT_FOUND; break; case EFI_NOT_FOUND: break; From 5de2055d31ea88fd9ae9709ac95c372a505a60fa Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 16 Mar 2021 11:31:16 -0400 Subject: [PATCH 37/67] locking/ww_mutex: Simplify use_ww_ctx & ww_ctx handling The use_ww_ctx flag is passed to mutex_optimistic_spin(), but the function doesn't use it. The frequent use of the (use_ww_ctx && ww_ctx) combination is repetitive. In fact, ww_ctx should not be used at all if !use_ww_ctx. Simplify ww_mutex code by dropping use_ww_ctx from mutex_optimistic_spin() an clear ww_ctx if !use_ww_ctx. In this way, we can replace (use_ww_ctx && ww_ctx) by just (ww_ctx). Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Acked-by: Davidlohr Bueso Link: https://lore.kernel.org/r/20210316153119.13802-2-longman@redhat.com --- kernel/locking/mutex.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index adb935090768..622ebdfcd083 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -626,7 +626,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) */ static __always_inline bool mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, - const bool use_ww_ctx, struct mutex_waiter *waiter) + struct mutex_waiter *waiter) { if (!waiter) { /* @@ -702,7 +702,7 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, #else static __always_inline bool mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, - const bool use_ww_ctx, struct mutex_waiter *waiter) + struct mutex_waiter *waiter) { return false; } @@ -922,6 +922,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct ww_mutex *ww; int ret; + if (!use_ww_ctx) + ww_ctx = NULL; + might_sleep(); #ifdef CONFIG_DEBUG_MUTEXES @@ -929,7 +932,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, #endif ww = container_of(lock, struct ww_mutex, base); - if (use_ww_ctx && ww_ctx) { + if (ww_ctx) { if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) return -EALREADY; @@ -946,10 +949,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); if (__mutex_trylock(lock) || - mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, NULL)) { + mutex_optimistic_spin(lock, ww_ctx, NULL)) { /* got the lock, yay! */ lock_acquired(&lock->dep_map, ip); - if (use_ww_ctx && ww_ctx) + if (ww_ctx) ww_mutex_set_context_fastpath(ww, ww_ctx); preempt_enable(); return 0; @@ -960,7 +963,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * After waiting to acquire the wait_lock, try again. */ if (__mutex_trylock(lock)) { - if (use_ww_ctx && ww_ctx) + if (ww_ctx) __ww_mutex_check_waiters(lock, ww_ctx); goto skip_wait; @@ -1013,7 +1016,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, goto err; } - if (use_ww_ctx && ww_ctx) { + if (ww_ctx) { ret = __ww_mutex_check_kill(lock, &waiter, ww_ctx); if (ret) goto err; @@ -1026,7 +1029,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * ww_mutex needs to always recheck its position since its waiter * list is not FIFO ordered. */ - if ((use_ww_ctx && ww_ctx) || !first) { + if (ww_ctx || !first) { first = __mutex_waiter_is_first(lock, &waiter); if (first) __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); @@ -1039,7 +1042,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * or we must see its unlock and acquire. */ if (__mutex_trylock(lock) || - (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, &waiter))) + (first && mutex_optimistic_spin(lock, ww_ctx, &waiter))) break; spin_lock(&lock->wait_lock); @@ -1048,7 +1051,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, acquired: __set_current_state(TASK_RUNNING); - if (use_ww_ctx && ww_ctx) { + if (ww_ctx) { /* * Wound-Wait; we stole the lock (!first_waiter), check the * waiters as anyone might want to wound us. @@ -1068,7 +1071,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, /* got the lock - cleanup and rejoice! */ lock_acquired(&lock->dep_map, ip); - if (use_ww_ctx && ww_ctx) + if (ww_ctx) ww_mutex_lock_acquired(ww, ww_ctx); spin_unlock(&lock->wait_lock); From bee645788e07eea63055d261d2884ea45c2ba857 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 16 Mar 2021 11:31:17 -0400 Subject: [PATCH 38/67] locking/ww_mutex: Fix acquire/release imbalance in ww_acquire_init()/ww_acquire_fini() In ww_acquire_init(), mutex_acquire() is gated by CONFIG_DEBUG_LOCK_ALLOC. The dep_map in the ww_acquire_ctx structure is also gated by the same config. However mutex_release() in ww_acquire_fini() is gated by CONFIG_DEBUG_MUTEXES. It is possible to set CONFIG_DEBUG_MUTEXES without setting CONFIG_DEBUG_LOCK_ALLOC though it is an unlikely configuration. That may cause a compilation error as dep_map isn't defined in this case. Fix this potential problem by enclosing mutex_release() inside CONFIG_DEBUG_LOCK_ALLOC. Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210316153119.13802-3-longman@redhat.com --- include/linux/ww_mutex.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 850424e5d030..6ecf2a0220db 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -173,9 +173,10 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) */ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { -#ifdef CONFIG_DEBUG_MUTEXES +#ifdef CONFIG_DEBUG_LOCK_ALLOC mutex_release(&ctx->dep_map, _THIS_IP_); - +#endif +#ifdef CONFIG_DEBUG_MUTEXES DEBUG_LOCKS_WARN_ON(ctx->acquired); if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) /* From 9858af27e69247c5d04c3b093190a93ca365f33d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 11 Mar 2021 10:44:45 +0000 Subject: [PATCH 39/67] usbip: Fix incorrect double assignment to udc->ud.tcp_rx Currently udc->ud.tcp_rx is being assigned twice, the second assignment is incorrect, it should be to udc->ud.tcp_tx instead of rx. Fix this. Fixes: 46613c9dfa96 ("usbip: fix vudc usbip_sockfd_store races leading to gpf") Acked-by: Shuah Khan Signed-off-by: Colin Ian King Cc: stable Addresses-Coverity: ("Unused value") Link: https://lore.kernel.org/r/20210311104445.7811-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/usbip/vudc_sysfs.c b/drivers/usb/usbip/vudc_sysfs.c index a3ec39fc6177..7383a543c6d1 100644 --- a/drivers/usb/usbip/vudc_sysfs.c +++ b/drivers/usb/usbip/vudc_sysfs.c @@ -174,7 +174,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, udc->ud.tcp_socket = socket; udc->ud.tcp_rx = tcp_rx; - udc->ud.tcp_rx = tcp_tx; + udc->ud.tcp_tx = tcp_tx; udc->ud.status = SDEV_ST_USED; spin_unlock_irq(&udc->ud.lock); From 98f153a10da403ddd5e9d98a3c8c2bb54bb5a0b6 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Thu, 11 Mar 2021 14:42:41 +0800 Subject: [PATCH 40/67] usb: gadget: configfs: Fix KASAN use-after-free When gadget is disconnected, running sequence is like this. . composite_disconnect . Call trace: usb_string_copy+0xd0/0x128 gadget_config_name_configuration_store+0x4 gadget_config_name_attr_store+0x40/0x50 configfs_write_file+0x198/0x1f4 vfs_write+0x100/0x220 SyS_write+0x58/0xa8 . configfs_composite_unbind . configfs_composite_bind In configfs_composite_bind, it has "cn->strings.s = cn->configuration;" When usb_string_copy is invoked. it would allocate memory, copy input string, release previous pointed memory space, and use new allocated memory. When gadget is connected, host sends down request to get information. Call trace: usb_gadget_get_string+0xec/0x168 lookup_string+0x64/0x98 composite_setup+0xa34/0x1ee8 If gadget is disconnected and connected quickly, in the failed case, cn->configuration memory has been released by usb_string_copy kfree but configfs_composite_bind hasn't been run in time to assign new allocated "cn->configuration" pointer to "cn->strings.s". When "strlen(s->s) of usb_gadget_get_string is being executed, the dangling memory is accessed, "BUG: KASAN: use-after-free" error occurs. Cc: stable@vger.kernel.org Signed-off-by: Jim Lin Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/1615444961-13376-1-git-send-email-macpaul.lin@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 0d56f33d63c2..15a607ccef8a 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -97,6 +97,8 @@ struct gadget_config_name { struct list_head list; }; +#define USB_MAX_STRING_WITH_NULL_LEN (USB_MAX_STRING_LEN+1) + static int usb_string_copy(const char *s, char **s_copy) { int ret; @@ -106,12 +108,16 @@ static int usb_string_copy(const char *s, char **s_copy) if (ret > USB_MAX_STRING_LEN) return -EOVERFLOW; - str = kstrdup(s, GFP_KERNEL); - if (!str) - return -ENOMEM; + if (copy) { + str = copy; + } else { + str = kmalloc(USB_MAX_STRING_WITH_NULL_LEN, GFP_KERNEL); + if (!str) + return -ENOMEM; + } + strcpy(str, s); if (str[ret - 1] == '\n') str[ret - 1] = '\0'; - kfree(copy); *s_copy = str; return 0; } From 546aa0e4ea6ed81b6c51baeebc4364542fa3f3a7 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 17 Mar 2021 15:06:54 -0400 Subject: [PATCH 41/67] usb-storage: Add quirk to defeat Kindle's automatic unload Matthias reports that the Amazon Kindle automatically removes its emulated media if it doesn't receive another SCSI command within about one second after a SYNCHRONIZE CACHE. It does so even when the host has sent a PREVENT MEDIUM REMOVAL command. The reason for this behavior isn't clear, although it's not hard to make some guesses. At any rate, the results can be unexpected for anyone who tries to access the Kindle in an unusual fashion, and in theory they can lead to data loss (for example, if one file is closed and synchronized while other files are still in the middle of being written). To avoid such problems, this patch creates a new usb-storage quirks flag telling the driver always to issue a REQUEST SENSE following a SYNCHRONIZE CACHE command, and adds an unusual_devs entry for the Kindle with the flag set. This is sufficient to prevent the Kindle from doing its automatic unload, without interfering with proper operation. Another possible way to deal with this would be to increase the frequency of TEST UNIT READY polling that the kernel normally carries out for removable-media storage devices. However that would increase the overall load on the system and it is not as reliable, because the user can override the polling interval. Changing the driver's behavior is safer and has minimal overhead. CC: Reported-and-tested-by: Matthias Schwarzott Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20210317190654.GA497856@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/transport.c | 7 +++++++ drivers/usb/storage/unusual_devs.h | 12 ++++++++++++ include/linux/usb_usual.h | 2 ++ 3 files changed, 21 insertions(+) diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 5eb895b19c55..f4304ce69350 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -656,6 +656,13 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) need_auto_sense = 1; } + /* Some devices (Kindle) require another command after SYNC CACHE */ + if ((us->fflags & US_FL_SENSE_AFTER_SYNC) && + srb->cmnd[0] == SYNCHRONIZE_CACHE) { + usb_stor_dbg(us, "-- sense after SYNC CACHE\n"); + need_auto_sense = 1; + } + /* * If we have a failure, we're going to do a REQUEST_SENSE * automatically. Note that we differentiate between a command diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 5732e9691f08..efa972be2ee3 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2211,6 +2211,18 @@ UNUSUAL_DEV( 0x1908, 0x3335, 0x0200, 0x0200, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_READ_DISC_INFO ), +/* + * Reported by Matthias Schwarzott + * The Amazon Kindle treats SYNCHRONIZE CACHE as an indication that + * the host may be finished with it, and automatically ejects its + * emulated media unless it receives another command within one second. + */ +UNUSUAL_DEV( 0x1949, 0x0004, 0x0000, 0x9999, + "Amazon", + "Kindle", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_SENSE_AFTER_SYNC ), + /* * Reported by Oliver Neukum * This device morphes spontaneously into another device if the access diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 6b03fdd69d27..712363c7a2e8 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -86,6 +86,8 @@ /* lies about caching, so always sync */ \ US_FLAG(NO_SAME, 0x40000000) \ /* Cannot handle WRITE_SAME */ \ + US_FLAG(SENSE_AFTER_SYNC, 0x80000000) \ + /* Do REQUEST_SENSE after SYNCHRONIZE_CACHE */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; From 3cac9104bea41099cf622091f0c0538bcb19050d Mon Sep 17 00:00:00 2001 From: Elias Rudberg Date: Thu, 11 Mar 2021 13:47:10 +0100 Subject: [PATCH 42/67] usb: typec: Remove vdo[3] part of tps6598x_rx_identity_reg struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused "u32 vdo[3]" part in the tps6598x_rx_identity_reg struct. This helps avoid "failed to register partner" errors which happen when tps6598x_read_partner_identity() fails because the amount of data read is 12 bytes smaller than the struct size. Note that vdo[3] is already in usb_pd_identity and hence shouldn't be added to tps6598x_rx_identity_reg as well. Fixes: f6c56ca91b92 ("usb: typec: Add the Product Type VDOs to struct usb_pd_identity") Reviewed-by: Heikki Krogerus Reviewed-by: Guido Günther Signed-off-by: Elias Rudberg Cc: stable Link: https://lore.kernel.org/r/20210311124710.6563-1-mail@eliasrudberg.se Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tps6598x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/typec/tps6598x.c b/drivers/usb/typec/tps6598x.c index 6e6ef6317523..29bd1c5a283c 100644 --- a/drivers/usb/typec/tps6598x.c +++ b/drivers/usb/typec/tps6598x.c @@ -64,7 +64,6 @@ enum { struct tps6598x_rx_identity_reg { u8 status; struct usb_pd_identity identity; - u32 vdo[3]; } __packed; /* Standard Task return codes */ From 86629e098a077922438efa98dc80917604dfd317 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 17 Mar 2021 11:12:48 -0700 Subject: [PATCH 43/67] usb: typec: tcpm: Invoke power_supply_changed for tcpm-source-psy- tcpm-source-psy- does not invoke power_supply_changed API when one of the published power supply properties is changed. power_supply_changed needs to be called to notify userspace clients(uevents) and kernel clients. Fixes: f2a8aa053c176 ("typec: tcpm: Represent source supply through power_supply") Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Cc: stable Link: https://lore.kernel.org/r/20210317181249.1062995-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index be0b6469dd3d..92093ea12cff 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -942,6 +942,7 @@ static int tcpm_set_current_limit(struct tcpm_port *port, u32 max_ma, u32 mv) port->supply_voltage = mv; port->current_limit = max_ma; + power_supply_changed(port->psy); if (port->tcpc->set_current_limit) ret = port->tcpc->set_current_limit(port->tcpc, max_ma, mv); @@ -2928,6 +2929,7 @@ static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo, port->pps_data.supported = false; port->usb_type = POWER_SUPPLY_USB_TYPE_PD; + power_supply_changed(port->psy); /* * Select the source PDO providing the most power which has a @@ -2952,6 +2954,7 @@ static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo, port->pps_data.supported = true; port->usb_type = POWER_SUPPLY_USB_TYPE_PD_PPS; + power_supply_changed(port->psy); } continue; default: @@ -3109,6 +3112,7 @@ static unsigned int tcpm_pd_select_pps_apdo(struct tcpm_port *port) port->pps_data.out_volt)); port->pps_data.op_curr = min(port->pps_data.max_curr, port->pps_data.op_curr); + power_supply_changed(port->psy); } return src_pdo; @@ -3344,6 +3348,7 @@ static int tcpm_set_charge(struct tcpm_port *port, bool charge) return ret; } port->vbus_charge = charge; + power_supply_changed(port->psy); return 0; } @@ -3523,6 +3528,7 @@ static void tcpm_reset_port(struct tcpm_port *port) port->try_src_count = 0; port->try_snk_count = 0; port->usb_type = POWER_SUPPLY_USB_TYPE_C; + power_supply_changed(port->psy); port->nr_sink_caps = 0; port->sink_cap_done = false; if (port->tcpc->enable_frs) @@ -5905,7 +5911,7 @@ static int tcpm_psy_set_prop(struct power_supply *psy, ret = -EINVAL; break; } - + power_supply_changed(port->psy); return ret; } @@ -6058,6 +6064,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) err = devm_tcpm_psy_register(port); if (err) goto out_role_sw_put; + power_supply_changed(port->psy); port->typec_port = typec_register_port(port->dev, &port->typec_caps); if (IS_ERR(port->typec_port)) { From f09ddcfcb8c569675066337adac2ac205113471f Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Thu, 11 Mar 2021 15:59:02 -0800 Subject: [PATCH 44/67] usb: dwc3: gadget: Prevent EP queuing while stopping transfers In the situations where the DWC3 gadget stops active transfers, once calling the dwc3_gadget_giveback(), there is a chance where a function driver can queue a new USB request in between the time where the dwc3 lock has been released and re-aquired. This occurs after we've already issued an ENDXFER command. When the stop active transfers continues to remove USB requests from all dep lists, the newly added request will also be removed, while controller still has an active TRB for it. This can lead to the controller accessing an unmapped memory address. Fix this by ensuring parameters to prevent EP queuing are set before calling the stop active transfers API. Fixes: ae7e86108b12 ("usb: dwc3: Stop active transfers before halting the controller") Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/1615507142-23097-1-git-send-email-wcheng@codeaurora.org Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index aebcf8ec0716..4a337f348651 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -783,8 +783,6 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) trace_dwc3_gadget_ep_disable(dep); - dwc3_remove_requests(dwc, dep); - /* make sure HW endpoint isn't stalled */ if (dep->flags & DWC3_EP_STALL) __dwc3_gadget_ep_set_halt(dep, 0, false); @@ -803,6 +801,8 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) dep->endpoint.desc = NULL; } + dwc3_remove_requests(dwc, dep); + return 0; } @@ -1617,7 +1617,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) { struct dwc3 *dwc = dep->dwc; - if (!dep->endpoint.desc || !dwc->pullups_connected) { + if (!dep->endpoint.desc || !dwc->pullups_connected || !dwc->connected) { dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n", dep->name); return -ESHUTDOWN; @@ -2247,6 +2247,7 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) if (!is_on) { u32 count; + dwc->connected = false; /* * In the Synopsis DesignWare Cores USB3 Databook Rev. 3.30a * Section 4.1.8 Table 4-7, it states that for a device-initiated @@ -2271,7 +2272,6 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) % dwc->ev_buf->length; } - dwc->connected = false; } else { __dwc3_gadget_start(dwc); } @@ -3321,8 +3321,6 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) { u32 reg; - dwc->connected = true; - /* * WORKAROUND: DWC3 revisions <1.88a have an issue which * would cause a missing Disconnect Event if there's a @@ -3362,6 +3360,7 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) * transfers." */ dwc3_stop_active_transfers(dwc); + dwc->connected = true; reg = dwc3_readl(dwc->regs, DWC3_DCTL); reg &= ~DWC3_DCTL_TSTCTRL_MASK; From 2b8c956ea6ba896ec18ae36c2684ecfa04c1f479 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 17 Mar 2021 23:48:05 -0700 Subject: [PATCH 45/67] usb: typec: tcpm: Skip sink_cap query only when VDM sm is busy When port partner responds "Not supported" to the DiscIdentity command, VDM state machine can remain in NVDM_STATE_ERR_TMOUT and this causes querying sink cap to be skipped indefinitely. Hence check for vdm_sm_running instead of checking for VDM_STATE_DONE. Fixes: 8dc4bd073663f ("usb: typec: tcpm: Add support for Sink Fast Role SWAP(FRS)") Acked-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Cc: stable Link: https://lore.kernel.org/r/20210318064805.3747831-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 92093ea12cff..ce7af398c7c1 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5173,7 +5173,7 @@ static void tcpm_enable_frs_work(struct kthread_work *work) goto unlock; /* Send when the state machine is idle */ - if (port->state != SNK_READY || port->vdm_state != VDM_STATE_DONE || port->send_discover) + if (port->state != SNK_READY || port->vdm_sm_running || port->send_discover) goto resched; port->upcoming_state = GET_SINK_CAP; From 2cafd46a714af1e55354bc6dcea9dcc13f9475b5 Mon Sep 17 00:00:00 2001 From: Edmundo Carmona Antoranz Date: Tue, 16 Mar 2021 12:17:35 -0600 Subject: [PATCH 46/67] staging: vt665x: fix alignment constraints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removing 2 instances of alignment warnings drivers/staging/vt6655/rxtx.h:153:1: warning: alignment 1 of ‘struct vnt_cts’ is less than 2 [-Wpacked-not-aligned] drivers/staging/vt6655/rxtx.h:163:1: warning: alignment 1 of ‘struct vnt_cts_fb’ is less than 2 [-Wpacked-not-aligned] The root cause seems to be that _because_ struct ieee80211_cts is marked as __aligned(2), this requires any encapsulating struct to also have an alignment of 2. Fixes: 2faf12c57efe ("staging: vt665x: fix alignment constraints") Reviewed-by: Arnd Bergmann Signed-off-by: Edmundo Carmona Antoranz Link: https://lore.kernel.org/r/20210316181736.2553318-1-eantoranz@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6655/rxtx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vt6655/rxtx.h b/drivers/staging/vt6655/rxtx.h index e7061d383306..c3c2c1566882 100644 --- a/drivers/staging/vt6655/rxtx.h +++ b/drivers/staging/vt6655/rxtx.h @@ -150,7 +150,7 @@ struct vnt_cts { u16 reserved; struct ieee80211_cts data; u16 reserved2; -} __packed; +} __packed __aligned(2); struct vnt_cts_fb { struct vnt_phy_field b; @@ -160,7 +160,7 @@ struct vnt_cts_fb { __le16 cts_duration_ba_f1; struct ieee80211_cts data; u16 reserved2; -} __packed; +} __packed __aligned(2); struct vnt_tx_fifo_head { u8 tx_key[WLAN_KEY_LEN_CCMP]; From 9ceee7d0841a8f7d7644021ba7d4cc1fbc7966e3 Mon Sep 17 00:00:00 2001 From: Lv Yunlong Date: Wed, 10 Mar 2021 00:31:27 -0800 Subject: [PATCH 47/67] firmware/efi: Fix a use after bug in efi_mem_reserve_persistent In the for loop in efi_mem_reserve_persistent(), prsv = rsv->next use the unmapped rsv. Use the unmapped pages will cause segment fault. Fixes: 18df7577adae6 ("efi/memreserve: deal with memreserve entries in unmapped memory") Signed-off-by: Lv Yunlong Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index df3f9bcab581..4b7ee3fa9224 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -927,7 +927,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) } /* first try to find a slot in an existing linked list entry */ - for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { + for (prsv = efi_memreserve_root->next; prsv; ) { rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size); if (index < rsv->size) { @@ -937,6 +937,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) memunmap(rsv); return efi_mem_reserve_iomem(addr, size); } + prsv = rsv->next; memunmap(rsv); } From fb98cc0b3af2ba4d87301dff2b381b12eee35d7d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 08:33:19 +0100 Subject: [PATCH 48/67] efi: use 32-bit alignment for efi_guid_t literals Commit 494c704f9af0 ("efi: Use 32-bit alignment for efi_guid_t") updated the type definition of efi_guid_t to ensure that it always appears sufficiently aligned (the UEFI spec is ambiguous about this, but given the fact that its EFI_GUID type is defined in terms of a struct carrying a uint32_t, the natural alignment is definitely >= 32 bits). However, we missed the EFI_GUID() macro which is used to instantiate efi_guid_t literals: that macro is still based on the guid_t type, which does not have a minimum alignment at all. This results in warnings such as In file included from drivers/firmware/efi/mokvar-table.c:35: include/linux/efi.h:1093:34: warning: passing 1-byte aligned argument to 4-byte aligned parameter 2 of 'get_var' may result in an unaligned pointer access [-Walign-mismatch] status = get_var(L"SecureBoot", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, ^ include/linux/efi.h:1101:24: warning: passing 1-byte aligned argument to 4-byte aligned parameter 2 of 'get_var' may result in an unaligned pointer access [-Walign-mismatch] get_var(L"SetupMode", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, &setupmode); The distinction only matters on CPUs that do not support misaligned loads fully, but 32-bit ARM's load-multiple instructions fall into that category, and these are likely to be emitted by the compiler that built the firmware for loading word-aligned 128-bit GUIDs from memory So re-implement the initializer in terms of our own efi_guid_t type, so that the alignment becomes a property of the literal's type. Fixes: 494c704f9af0 ("efi: Use 32-bit alignment for efi_guid_t") Reported-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/1327 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/efi.h b/include/linux/efi.h index 8710f5710c1d..6b5d36babfcc 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -72,8 +72,10 @@ typedef void *efi_handle_t; */ typedef guid_t efi_guid_t __aligned(__alignof__(u32)); -#define EFI_GUID(a,b,c,d0,d1,d2,d3,d4,d5,d6,d7) \ - GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) +#define EFI_GUID(a, b, c, d...) (efi_guid_t){ { \ + (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, d } } /* * Generic EFI table header From a501b048a95b79e1e34f03cac3c87ff1e9f229ad Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 18 Mar 2021 20:26:47 +0100 Subject: [PATCH 49/67] x86/ioapic: Ignore IRQ2 again Vitaly ran into an issue with hotplugging CPU0 on an Amazon instance where the matrix allocator claimed to be out of vectors. He analyzed it down to the point that IRQ2, the PIC cascade interrupt, which is supposed to be not ever routed to the IO/APIC ended up having an interrupt vector assigned which got moved during unplug of CPU0. The underlying issue is that IRQ2 for various reasons (see commit af174783b925 ("x86: I/O APIC: Never configure IRQ2" for details) is treated as a reserved system vector by the vector core code and is not accounted as a regular vector. The Amazon BIOS has an routing entry of pin2 to IRQ2 which causes the IO/APIC setup to claim that interrupt which is granted by the vector domain because there is no sanity check. As a consequence the allocation counter of CPU0 underflows which causes a subsequent unplug to fail with: [ ... ] CPU 0 has 4294967295 vectors, 589 available. Cannot disable CPU There is another sanity check missing in the matrix allocator, but the underlying root cause is that the IO/APIC code lost the IRQ2 ignore logic during the conversion to irqdomains. For almost 6 years nobody complained about this wreckage, which might indicate that this requirement could be lifted, but for any system which actually has a PIC IRQ2 is unusable by design so any routing entry has no effect and the interrupt cannot be connected to a device anyway. Due to that and due to history biased paranoia reasons restore the IRQ2 ignore logic and treat it as non existent despite a routing entry claiming otherwise. Fixes: d32932d02e18 ("x86/irq: Convert IOAPIC to use hierarchical irqdomain interfaces") Reported-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Tested-by: Vitaly Kuznetsov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210318192819.636943062@linutronix.de --- arch/x86/kernel/apic/io_apic.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c3b60c37c728..73ff4dd426a8 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1032,6 +1032,16 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; legacy = mp_is_legacy_irq(irq); + /* + * IRQ2 is unusable for historical reasons on systems which + * have a legacy PIC. See the comment vs. IRQ2 further down. + * + * If this gets removed at some point then the related code + * in lapic_assign_system_vectors() needs to be adjusted as + * well. + */ + if (legacy && irq == PIC_CASCADE_IR) + return -EINVAL; } mutex_lock(&ioapic_mutex); From 68b1eddd421d2b16c6655eceb48918a1e896bbbc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Mar 2021 11:27:19 +0100 Subject: [PATCH 50/67] static_call: Fix static_call_set_init() It turns out that static_call_set_init() does not preserve the other flags; IOW. it clears TAIL if it was set. Fixes: 9183c3f9ed710 ("static_call: Add inline static call infrastructure") Reported-by: Sumit Garg Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jarkko Sakkinen Tested-by: Sumit Garg Link: https://lkml.kernel.org/r/20210318113610.519406371@infradead.org --- kernel/static_call.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/kernel/static_call.c b/kernel/static_call.c index ae825295cf68..080c8a9ddfaf 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -35,27 +35,30 @@ static inline void *static_call_addr(struct static_call_site *site) return (void *)((long)site->addr + (long)&site->addr); } +static inline unsigned long __static_call_key(const struct static_call_site *site) +{ + return (long)site->key + (long)&site->key; +} static inline struct static_call_key *static_call_key(const struct static_call_site *site) { - return (struct static_call_key *) - (((long)site->key + (long)&site->key) & ~STATIC_CALL_SITE_FLAGS); + return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS); } /* These assume the key is word-aligned. */ static inline bool static_call_is_init(struct static_call_site *site) { - return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_INIT; + return __static_call_key(site) & STATIC_CALL_SITE_INIT; } static inline bool static_call_is_tail(struct static_call_site *site) { - return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_TAIL; + return __static_call_key(site) & STATIC_CALL_SITE_TAIL; } static inline void static_call_set_init(struct static_call_site *site) { - site->key = ((long)static_call_key(site) | STATIC_CALL_SITE_INIT) - + site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) - (long)&site->key; } @@ -190,7 +193,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) } arch_static_call_transform(site_addr, NULL, func, - static_call_is_tail(site)); + static_call_is_tail(site)); } } @@ -349,7 +352,7 @@ static int static_call_add_module(struct module *mod) struct static_call_site *site; for (site = start; site != stop; site++) { - unsigned long s_key = (long)site->key + (long)&site->key; + unsigned long s_key = __static_call_key(site); unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; unsigned long key; From 698bacefe993ad2922c9d3b1380591ad489355e9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Mar 2021 11:29:56 +0100 Subject: [PATCH 51/67] static_call: Align static_call_is_init() patching condition The intent is to avoid writing init code after init (because the text might have been freed). The code is needlessly different between jump_label and static_call and not obviously correct. The existing code relies on the fact that the module loader clears the init layout, such that within_module_init() always fails, while jump_label relies on the module state which is more obvious and matches the kernel logic. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jarkko Sakkinen Tested-by: Sumit Garg Link: https://lkml.kernel.org/r/20210318113610.636651340@infradead.org --- kernel/static_call.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/kernel/static_call.c b/kernel/static_call.c index 080c8a9ddfaf..fc2259047be2 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -149,6 +149,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) }; for (site_mod = &first; site_mod; site_mod = site_mod->next) { + bool init = system_state < SYSTEM_RUNNING; struct module *mod = site_mod->mod; if (!site_mod->sites) { @@ -168,6 +169,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) if (mod) { stop = mod->static_call_sites + mod->num_static_call_sites; + init = mod->state == MODULE_STATE_COMING; } #endif @@ -175,16 +177,8 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) site < stop && static_call_key(site) == key; site++) { void *site_addr = static_call_addr(site); - if (static_call_is_init(site)) { - /* - * Don't write to call sites which were in - * initmem and have since been freed. - */ - if (!mod && system_state >= SYSTEM_RUNNING) - continue; - if (mod && !within_module_init((unsigned long)site_addr, mod)) - continue; - } + if (!init && static_call_is_init(site)) + continue; if (!kernel_text_address((unsigned long)site_addr)) { WARN_ONCE(1, "can't patch static call site at %pS", From 38c93587375053c5b9ef093f4a5ea754538cba32 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Mar 2021 11:31:51 +0100 Subject: [PATCH 52/67] static_call: Fix static_call_update() sanity check Sites that match init_section_contains() get marked as INIT. For built-in code init_sections contains both __init and __exit text. OTOH kernel_text_address() only explicitly includes __init text (and there are no __exit text markers). Match what jump_label already does and ignore the warning for INIT sites. Also see the excellent changelog for commit: 8f35eaa5f2de ("jump_label: Don't warn on __exit jump entries") Fixes: 9183c3f9ed710 ("static_call: Add inline static call infrastructure") Reported-by: Sumit Garg Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jarkko Sakkinen Tested-by: Sumit Garg Link: https://lkml.kernel.org/r/20210318113610.739542434@infradead.org --- kernel/jump_label.c | 8 ++++++++ kernel/static_call.c | 11 ++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index c6a39d662935..ba39fbb1f8e7 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -407,6 +407,14 @@ static bool jump_label_can_update(struct jump_entry *entry, bool init) return false; if (!kernel_text_address(jump_entry_code(entry))) { + /* + * This skips patching built-in __exit, which + * is part of init_section_contains() but is + * not part of kernel_text_address(). + * + * Skipping built-in __exit is fine since it + * will never be executed. + */ WARN_ONCE(!jump_entry_is_init(entry), "can't patch jump_label at %pS", (void *)jump_entry_code(entry)); diff --git a/kernel/static_call.c b/kernel/static_call.c index fc2259047be2..2c5950b0b90e 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -181,7 +181,16 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) continue; if (!kernel_text_address((unsigned long)site_addr)) { - WARN_ONCE(1, "can't patch static call site at %pS", + /* + * This skips patching built-in __exit, which + * is part of init_section_contains() but is + * not part of kernel_text_address(). + * + * Skipping built-in __exit is fine since it + * will never be executed. + */ + WARN_ONCE(!static_call_is_init(site), + "can't patch static call site at %pS", site_addr); continue; } From dd926880da8dbbe409e709c1d3c1620729a94732 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 12 Mar 2021 10:20:33 +0100 Subject: [PATCH 53/67] x86/apic/of: Fix CPU devicetree-node lookups Architectures that describe the CPU topology in devicetree and do not have an identity mapping between physical and logical CPU ids must override the default implementation of arch_match_cpu_phys_id(). Failing to do so breaks CPU devicetree-node lookups using of_get_cpu_node() and of_cpu_device_node_get() which several drivers rely on. It also causes the CPU struct devices exported through sysfs to point to the wrong devicetree nodes. On x86, CPUs are described in devicetree using their APIC ids and those do not generally coincide with the logical ids, even if CPU0 typically uses APIC id 0. Add the missing implementation of arch_match_cpu_phys_id() so that CPU-node lookups work also with SMP. Apart from fixing the broken sysfs devicetree-node links this likely does not affect current users of mainline kernels on x86. Fixes: 4e07db9c8db8 ("x86/devicetree: Use CPU description from Device Tree") Signed-off-by: Johan Hovold Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210312092033.26317-1-johan@kernel.org --- arch/x86/kernel/apic/apic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index bda4f2a36868..4f26700f314d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2342,6 +2342,11 @@ static int cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = -1, }; +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpuid_to_apicid[cpu]; +} + #ifdef CONFIG_SMP /** * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread From 81e2073c175b887398e5bca6c004efa89983f58d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 17 Mar 2021 15:38:52 +0100 Subject: [PATCH 54/67] genirq: Disable interrupts for force threaded handlers With interrupt force threading all device interrupt handlers are invoked from kernel threads. Contrary to hard interrupt context the invocation only disables bottom halfs, but not interrupts. This was an oversight back then because any code like this will have an issue: thread(irq_A) irq_handler(A) spin_lock(&foo->lock); interrupt(irq_B) irq_handler(B) spin_lock(&foo->lock); This has been triggered with networking (NAPI vs. hrtimers) and console drivers where printk() happens from an interrupt which interrupted the force threaded handler. Now people noticed and started to change the spin_lock() in the handler to spin_lock_irqsave() which affects performance or add IRQF_NOTHREAD to the interrupt request which in turn breaks RT. Fix the root cause and not the symptom and disable interrupts before invoking the force threaded handler which preserves the regular semantics and the usefulness of the interrupt force threading as a general debugging tool. For not RT this is not changing much, except that during the execution of the threaded handler interrupts are delayed until the handler returns. Vs. scheduling and softirq processing there is no difference. For RT kernels there is no issue. Fixes: 8d32a307e4fa ("genirq: Provide forced interrupt threading") Reported-by: Johan Hovold Signed-off-by: Thomas Gleixner Reviewed-by: Johan Hovold Acked-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20210317143859.513307808@linutronix.de --- kernel/irq/manage.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index dec3f73e8db9..21ea370fccda 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1142,11 +1142,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) irqreturn_t ret; local_bh_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); ret = action->thread_fn(action->irq, action->dev_id); if (ret == IRQ_HANDLED) atomic_inc(&desc->threads_handled); irq_finalize_oneshot(desc, action); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); local_bh_enable(); return ret; } From b7ff91fd030dc9d72ed91b1aab36e445a003af4f Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 3 Mar 2021 21:17:02 +0800 Subject: [PATCH 55/67] ext4: find old entry again if failed to rename whiteout If we failed to add new entry on rename whiteout, we cannot reset the old->de entry directly, because the old->de could have moved from under us during make indexed dir. So find the old entry again before reset is needed, otherwise it may corrupt the filesystem as below. /dev/sda: Entry '00000001' in ??? (12) has deleted/unused inode 15. CLEARED. /dev/sda: Unattached inode 75 /dev/sda: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY. Fixes: 6b4b8e6b4ad ("ext4: fix bug for rename with RENAME_WHITEOUT") Cc: stable@vger.kernel.org Signed-off-by: zhangyi (F) Link: https://lore.kernel.org/r/20210303131703.330415-1-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 686bf982c84e..4aae59d55288 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3613,6 +3613,31 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, return retval; } +static void ext4_resetent(handle_t *handle, struct ext4_renament *ent, + unsigned ino, unsigned file_type) +{ + struct ext4_renament old = *ent; + int retval = 0; + + /* + * old->de could have moved from under us during make indexed dir, + * so the old->de may no longer valid and need to find it again + * before reset old inode info. + */ + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); + if (IS_ERR(old.bh)) + retval = PTR_ERR(old.bh); + if (!old.bh) + retval = -ENOENT; + if (retval) { + ext4_std_error(old.dir->i_sb, retval); + return; + } + + ext4_setent(handle, &old, ino, file_type); + brelse(old.bh); +} + static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, const struct qstr *d_name) { @@ -3937,8 +3962,8 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, end_rename: if (whiteout) { if (retval) { - ext4_setent(handle, &old, - old.inode->i_ino, old_file_type); + ext4_resetent(handle, &old, + old.inode->i_ino, old_file_type); drop_nlink(whiteout); } unlock_new_inode(whiteout); From 5dccdc5a1916d4266edd251f20bbbb113a5c495f Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 3 Mar 2021 21:17:03 +0800 Subject: [PATCH 56/67] ext4: do not iput inode under running transaction in ext4_rename() In ext4_rename(), when RENAME_WHITEOUT failed to add new entry into directory, it ends up dropping new created whiteout inode under the running transaction. After commit <9b88f9fb0d2> ("ext4: Do not iput inode under running transaction"), we follow the assumptions that evict() does not get called from a transaction context but in ext4_rename() it breaks this suggestion. Although it's not a real problem, better to obey it, so this patch add inode to orphan list and stop transaction before final iput(). Signed-off-by: zhangyi (F) Link: https://lore.kernel.org/r/20210303131703.330415-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4aae59d55288..1fe8370dbf0b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3799,14 +3799,14 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, */ retval = -ENOENT; if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) - goto end_rename; + goto release_bh; new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, &new.de, &new.inlined); if (IS_ERR(new.bh)) { retval = PTR_ERR(new.bh); new.bh = NULL; - goto end_rename; + goto release_bh; } if (new.bh) { if (!new.inode) { @@ -3823,15 +3823,13 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits); if (IS_ERR(handle)) { retval = PTR_ERR(handle); - handle = NULL; - goto end_rename; + goto release_bh; } } else { whiteout = ext4_whiteout_for_rename(mnt_userns, &old, credits, &handle); if (IS_ERR(whiteout)) { retval = PTR_ERR(whiteout); - whiteout = NULL; - goto end_rename; + goto release_bh; } } @@ -3965,16 +3963,18 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, ext4_resetent(handle, &old, old.inode->i_ino, old_file_type); drop_nlink(whiteout); + ext4_orphan_add(handle, whiteout); } unlock_new_inode(whiteout); + ext4_journal_stop(handle); iput(whiteout); - + } else { + ext4_journal_stop(handle); } +release_bh: brelse(old.dir_bh); brelse(old.bh); brelse(new.bh); - if (handle) - ext4_journal_stop(handle); return retval; } From 6b22489911b726eebbf169caee52fea52013fbdd Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Fri, 5 Mar 2021 20:05:08 +0800 Subject: [PATCH 57/67] ext4: do not try to set xattr into ea_inode if value is empty Syzbot report a warning that ext4 may create an empty ea_inode if set an empty extent attribute to a file on the file system which is no free blocks left. WARNING: CPU: 6 PID: 10667 at fs/ext4/xattr.c:1640 ext4_xattr_set_entry+0x10f8/0x1114 fs/ext4/xattr.c:1640 ... Call trace: ext4_xattr_set_entry+0x10f8/0x1114 fs/ext4/xattr.c:1640 ext4_xattr_block_set+0x1d0/0x1b1c fs/ext4/xattr.c:1942 ext4_xattr_set_handle+0x8a0/0xf1c fs/ext4/xattr.c:2390 ext4_xattr_set+0x120/0x1f0 fs/ext4/xattr.c:2491 ext4_xattr_trusted_set+0x48/0x5c fs/ext4/xattr_trusted.c:37 __vfs_setxattr+0x208/0x23c fs/xattr.c:177 ... Now, ext4 try to store extent attribute into an external inode if ext4_xattr_block_set() return -ENOSPC, but for the case of store an empty extent attribute, store the extent entry into the extent attribute block is enough. A simple reproduce below. fallocate test.img -l 1M mkfs.ext4 -F -b 2048 -O ea_inode test.img mount test.img /mnt dd if=/dev/zero of=/mnt/foo bs=2048 count=500 setfattr -n "user.test" /mnt/foo Reported-by: syzbot+98b881fdd8ebf45ab4ae@syzkaller.appspotmail.com Fixes: 9c6e7853c531 ("ext4: reserve space for xattr entries/names") Cc: stable@kernel.org Signed-off-by: zhangyi (F) Link: https://lore.kernel.org/r/20210305120508.298465-1-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 083c95126781..6c1018223c54 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2404,7 +2404,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, * external inode if possible. */ if (ext4_has_feature_ea_inode(inode->i_sb) && - !i.in_inode) { + i.value_len && !i.in_inode) { i.in_inode = 1; goto retry_inode; } From 7d8bd3c76da1d94b85e6c9b7007e20e980bfcfe6 Mon Sep 17 00:00:00 2001 From: Shijie Luo Date: Fri, 12 Mar 2021 01:50:51 -0500 Subject: [PATCH 58/67] ext4: fix potential error in ext4_do_update_inode If set_large_file = 1 and errors occur in ext4_handle_dirty_metadata(), the error code will be overridden, go to out_brelse to avoid this situation. Signed-off-by: Shijie Luo Link: https://lore.kernel.org/r/20210312065051.36314-1-luoshijie1@huawei.com Cc: stable@kernel.org Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a79a9ea58c56..927e47db7f00 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5026,7 +5026,7 @@ static int ext4_do_update_inode(handle_t *handle, struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh = iloc->bh; struct super_block *sb = inode->i_sb; - int err = 0, rc, block; + int err = 0, block; int need_datasync = 0, set_large_file = 0; uid_t i_uid; gid_t i_gid; @@ -5138,9 +5138,9 @@ static int ext4_do_update_inode(handle_t *handle, bh->b_data); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - rc = ext4_handle_dirty_metadata(handle, NULL, bh); - if (!err) - err = rc; + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (err) + goto out_brelse; ext4_clear_inode_state(inode, EXT4_STATE_NEW); if (set_large_file) { BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); From 2a4ae3bcdf05b8639406eaa09a2939f3c6dd8e75 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 15 Mar 2021 17:59:06 +0100 Subject: [PATCH 59/67] ext4: fix timer use-after-free on failed mount When filesystem mount fails because of corrupted filesystem we first cancel the s_err_report timer reminding fs errors every day and only then we flush s_error_work. However s_error_work may report another fs error and re-arm timer thus resulting in timer use-after-free. Fix the problem by first flushing the work and only after that canceling the s_err_report timer. Reported-by: syzbot+628472a2aac693ab0fcd@syzkaller.appspotmail.com Fixes: 2d01ddc86606 ("ext4: save error info to sb through journal if available") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20210315165906.2175-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a0a256859662..b9693680463a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5154,8 +5154,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) failed_mount3a: ext4_es_unregister_shrinker(sbi); failed_mount3: - del_timer_sync(&sbi->s_err_report); flush_work(&sbi->s_error_work); + del_timer_sync(&sbi->s_err_report); if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: From 8210bb29c1b66200cff7b25febcf6e39baf49fbf Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Tue, 16 Mar 2021 15:19:21 -0700 Subject: [PATCH 60/67] ext4: fix rename whiteout with fast commit This patch adds rename whiteout support in fast commits. Note that the whiteout object that gets created is actually char device. Which imples, the function ext4_inode_journal_mode(struct inode *inode) would return "JOURNAL_DATA" for this inode. This has a consequence in fast commit code that it will make creation of the whiteout object a fast-commit ineligible behavior and thus will fall back to full commits. With this patch, this can be observed by running fast commits with rename whiteout and seeing the stats generated by ext4_fc_stats tracepoint as follows: ext4_fc_stats: dev 254:32 fc ineligible reasons: XATTR:0, CROSS_RENAME:0, JOURNAL_FLAG_CHANGE:0, NO_MEM:0, SWAP_BOOT:0, RESIZE:0, RENAME_DIR:0, FALLOC_RANGE:0, INODE_JOURNAL_DATA:16; num_commits:6, ineligible: 6, numblks: 3 So in short, this patch guarantees that in case of rename whiteout, we fall back to full commits. Amir mentioned that instead of creating a new whiteout object for every rename, we can create a static whiteout object with irrelevant nlink. That will make fast commits to not fall back to full commit. But until this happens, this patch will ensure correctness by falling back to full commits. Fixes: 8016e29f4362 ("ext4: fast commit recovery path") Cc: stable@kernel.org Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20210316221921.1124955-1-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 ++ fs/ext4/fast_commit.c | 9 +++++++-- fs/ext4/namei.c | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ea3b41579f38..826a56e3bbd2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2794,6 +2794,8 @@ void __ext4_fc_track_link(handle_t *handle, struct inode *inode, struct dentry *dentry); void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry); void ext4_fc_track_link(handle_t *handle, struct dentry *dentry); +void __ext4_fc_track_create(handle_t *handle, struct inode *inode, + struct dentry *dentry); void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); void ext4_fc_track_inode(handle_t *handle, struct inode *inode); void ext4_fc_mark_ineligible(struct super_block *sb, int reason); diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 6c4f19b0a556..7541d0b5d706 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -513,10 +513,10 @@ void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) __ext4_fc_track_link(handle, d_inode(dentry), dentry); } -void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) +void __ext4_fc_track_create(handle_t *handle, struct inode *inode, + struct dentry *dentry) { struct __track_dentry_update_args args; - struct inode *inode = d_inode(dentry); int ret; args.dentry = dentry; @@ -527,6 +527,11 @@ void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) trace_ext4_fc_track_create(inode, dentry, ret); } +void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) +{ + __ext4_fc_track_create(handle, d_inode(dentry), dentry); +} + /* __track_fn for inode tracking */ static int __track_inode(struct inode *inode, void *arg, bool update) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1fe8370dbf0b..883e2a7cd4ab 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3873,6 +3873,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, retval = ext4_mark_inode_dirty(handle, whiteout); if (unlikely(retval)) goto end_rename; + } if (!new.bh) { retval = ext4_add_entry(handle, new.dentry, old.inode); @@ -3946,6 +3947,8 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, ext4_fc_track_unlink(handle, new.dentry); __ext4_fc_track_link(handle, old.inode, new.dentry); __ext4_fc_track_unlink(handle, old.inode, old.dentry); + if (whiteout) + __ext4_fc_track_create(handle, whiteout, old.dentry); } if (new.inode) { From 512c15ef05d73a04f1aef18a3bc61a8bb516f323 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 17 Jan 2021 00:57:32 -0800 Subject: [PATCH 61/67] ext4: stop inode update before return The inode update should be stopped before returing the error code. Signed-off-by: Pan Bian Link: https://lore.kernel.org/r/20210117085732.93788-1-bianpan2016@163.com Fixes: 8016e29f4362 ("ext4: fast commit recovery path") Cc: stable@kernel.org Reviewed-by: Harshad Shirwadkar Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 927e47db7f00..0948a43f1b3d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5387,8 +5387,10 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, inode->i_gid = attr->ia_gid; error = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); - if (unlikely(error)) + if (unlikely(error)) { + ext4_fc_stop_update(inode); return error; + } } if (attr->ia_valid & ATTR_SIZE) { From 64395d950bc476106b39341e42ebfd4d2eb71d2c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 21 Mar 2021 00:45:37 -0400 Subject: [PATCH 62/67] ext4: initialize ret to suppress smatch warning Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 77c7c8a54da7..77c84d6f1af6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4382,7 +4382,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, { struct inode *inode = file_inode(file); handle_t *handle; - int ret, ret2 = 0, ret3 = 0; + int ret = 0, ret2 = 0, ret3 = 0; int retries = 0; int depth = 0; struct ext4_map_blocks map; From 5be28c8f85ce99ed2d329d2ad8bdd18ea19473a5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Mar 2021 19:25:13 -0600 Subject: [PATCH 63/67] signal: don't allow sending any signals to PF_IO_WORKER threads They don't take signals individually, and even if they share signals with the parent task, don't allow them to be delivered through the worker thread. Linux does allow this kind of behavior for regular threads, but it's really a compatability thing that we need not care about for the IO threads. Reported-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- kernel/signal.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index ba4d1ef39a9e..11cabcf20e7a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -833,6 +833,9 @@ static int check_kill_permission(int sig, struct kernel_siginfo *info, if (!valid_signal(sig)) return -EINVAL; + /* PF_IO_WORKER threads don't take any signals */ + if (t->flags & PF_IO_WORKER) + return -ESRCH; if (!si_fromuser(info)) return 0; From 4db4b1a0d1779dc159f7b87feb97030ec0b12597 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 21 Mar 2021 09:37:48 -0600 Subject: [PATCH 64/67] signal: don't allow STOP on PF_IO_WORKER threads Just like we don't allow normal signals to IO threads, don't deliver a STOP to a task that has PF_IO_WORKER set. The IO threads don't take signals in general, and have no means of flushing out a stop either. Longer term, we may want to look into allowing stop of these threads, as it relates to eg process freezing. For now, this prevents a spin issue if a SIGSTOP is delivered to the parent task. Reported-by: Stefan Metzmacher Signed-off-by: Jens Axboe Signed-off-by: "Eric W. Biederman" --- kernel/signal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index 11cabcf20e7a..f2a1b898da29 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -288,7 +288,8 @@ bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask) JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING)); BUG_ON((mask & JOBCTL_TRAPPING) && !(mask & JOBCTL_PENDING_MASK)); - if (unlikely(fatal_signal_pending(task) || (task->flags & PF_EXITING))) + if (unlikely(fatal_signal_pending(task) || + (task->flags & (PF_EXITING | PF_IO_WORKER)))) return false; if (mask & JOBCTL_STOP_SIGMASK) From 00ddff431a458bbf143ea7c4c42d022676da1b17 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 21 Mar 2021 07:06:56 -0600 Subject: [PATCH 65/67] io-wq: ensure task is running before processing task_work Mark the current task as running if we need to run task_work from the io-wq threads as part of work handling. If that is the case, then return as such so that the caller can appropriately loop back and reset if it was part of a going-to-sleep flush. Fixes: 3bfe6106693b ("io-wq: fork worker threads from original task") Signed-off-by: Jens Axboe --- fs/io-wq.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index e05f996d088f..3dc10bfd8c3b 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -386,13 +386,16 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe) return NULL; } -static void io_flush_signals(void) +static bool io_flush_signals(void) { if (unlikely(test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))) { + __set_current_state(TASK_RUNNING); if (current->task_works) task_work_run(); clear_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL); + return true; } + return false; } static void io_assign_current_work(struct io_worker *worker, @@ -499,7 +502,8 @@ static int io_wqe_worker(void *data) } __io_worker_idle(wqe, worker); raw_spin_unlock_irq(&wqe->lock); - io_flush_signals(); + if (io_flush_signals()) + continue; ret = schedule_timeout(WORKER_IDLE_TIMEOUT); if (try_to_freeze() || ret) continue; From 0031275d119efe16711cd93519b595e6f9b4b330 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sat, 20 Mar 2021 20:33:36 +0100 Subject: [PATCH 66/67] io_uring: call req_set_fail_links() on short send[msg]()/recv[msg]() with MSG_WAITALL Without that it's not safe to use them in a linked combination with others. Now combinations like IORING_OP_SENDMSG followed by IORING_OP_SPLICE should be possible. We already handle short reads and writes for the following opcodes: - IORING_OP_READV - IORING_OP_READ_FIXED - IORING_OP_READ - IORING_OP_WRITEV - IORING_OP_WRITE_FIXED - IORING_OP_WRITE - IORING_OP_SPLICE - IORING_OP_TEE Now we have it for these as well: - IORING_OP_SENDMSG - IORING_OP_SEND - IORING_OP_RECVMSG - IORING_OP_RECV For IORING_OP_RECVMSG we also check for the MSG_TRUNC and MSG_CTRUNC flags in order to call req_set_fail_links(). There might be applications arround depending on the behavior that even short send[msg]()/recv[msg]() retuns continue an IOSQE_IO_LINK chain. It's very unlikely that such applications pass in MSG_WAITALL, which is only defined in 'man 2 recvmsg', but not in 'man 2 sendmsg'. It's expected that the low level sock_sendmsg() call just ignores MSG_WAITALL, as MSG_ZEROCOPY is also ignored without explicitly set SO_ZEROCOPY. We also expect the caller to know about the implicit truncation to MAX_RW_COUNT, which we don't detect. cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/c4e1a4cc0d905314f4d5dc567e65a7b09621aab3.1615908477.git.metze@samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- fs/io_uring.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ea2d3e120555..543551d70327 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4386,6 +4386,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) struct io_async_msghdr iomsg, *kmsg; struct socket *sock; unsigned flags; + int min_ret = 0; int ret; sock = sock_from_file(req->file); @@ -4406,6 +4407,9 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) else if (issue_flags & IO_URING_F_NONBLOCK) flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&kmsg->msg.msg_iter); + ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN) return io_setup_async_msg(req, kmsg); @@ -4416,7 +4420,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; - if (ret < 0) + if (ret < min_ret) req_set_fail_links(req); __io_req_complete(req, issue_flags, ret, 0); return 0; @@ -4429,6 +4433,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) struct iovec iov; struct socket *sock; unsigned flags; + int min_ret = 0; int ret; sock = sock_from_file(req->file); @@ -4450,6 +4455,9 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) else if (issue_flags & IO_URING_F_NONBLOCK) flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&msg.msg_iter); + msg.msg_flags = flags; ret = sock_sendmsg(sock, &msg); if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN) @@ -4457,7 +4465,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) if (ret == -ERESTARTSYS) ret = -EINTR; - if (ret < 0) + if (ret < min_ret) req_set_fail_links(req); __io_req_complete(req, issue_flags, ret, 0); return 0; @@ -4609,6 +4617,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) struct socket *sock; struct io_buffer *kbuf; unsigned flags; + int min_ret = 0; int ret, cflags = 0; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; @@ -4640,6 +4649,9 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) else if (force_nonblock) flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&kmsg->msg.msg_iter); + ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg, kmsg->uaddr, flags); if (force_nonblock && ret == -EAGAIN) @@ -4653,7 +4665,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; - if (ret < 0) + if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)))) req_set_fail_links(req); __io_req_complete(req, issue_flags, ret, cflags); return 0; @@ -4668,6 +4680,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) struct socket *sock; struct iovec iov; unsigned flags; + int min_ret = 0; int ret, cflags = 0; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; @@ -4699,6 +4712,9 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) else if (force_nonblock) flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&msg.msg_iter); + ret = sock_recvmsg(sock, &msg, flags); if (force_nonblock && ret == -EAGAIN) return -EAGAIN; @@ -4707,7 +4723,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) out_free: if (req->flags & REQ_F_BUFFER_SELECTED) cflags = io_put_recv_kbuf(req); - if (ret < 0) + if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)))) req_set_fail_links(req); __io_req_complete(req, issue_flags, ret, cflags); return 0; From 0d02ec6b3136c73c09e7859f0d0e4e2c4c07b49b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 21 Mar 2021 14:56:43 -0700 Subject: [PATCH 67/67] Linux 5.12-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a28bb374663d..d4784d181123 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Frozen Wasteland # *DOCUMENTATION*