From 1f6ce4445ab5c97aa63583bd6ce4fc20f471e11c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 15 Jun 2020 13:34:04 +0200 Subject: [PATCH 001/343] nandsim: Fix return code testing of ns_find_operation() ns_find_operation() returns 0 on success. Fixes: 052a7a5374bc ("mtd: rawnand: nandsim: Clean error handling") Signed-off-by: Richard Weinberger Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200615113404.25447-1-richard@nod.at --- drivers/mtd/nand/raw/nandsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c index 0a5cb77966cc..f5a53aac3c5f 100644 --- a/drivers/mtd/nand/raw/nandsim.c +++ b/drivers/mtd/nand/raw/nandsim.c @@ -1761,7 +1761,7 @@ static void ns_switch_state(struct nandsim *ns) NS_DBG("switch_state: operation is unknown, try to find it\n"); - if (!ns_find_operation(ns, 0)) + if (ns_find_operation(ns, 0)) return; if ((ns->state & ACTION_MASK) && From 630e8d5507d9f55dfa98134bfcadefb6cfba4fbb Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Mon, 15 Jun 2020 11:51:34 -0400 Subject: [PATCH 002/343] mtd: set master partition panic write flag Check and set master panic write flag so that low level drivers can use it to take required action to ensure oops data gets written to assigned mtdoops device partition. Fixes: 9f897bfdd89f ("mtd: Add flag to indicate panic_write") Signed-off-by: Kamal Dasu Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200615155134.32007-1-kdasu.kdev@gmail.com --- drivers/mtd/mtdcore.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 76d832a88e0c..7d930569a7df 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -1273,8 +1273,8 @@ int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, return -EROFS; if (!len) return 0; - if (!mtd->oops_panic_write) - mtd->oops_panic_write = true; + if (!master->oops_panic_write) + master->oops_panic_write = true; return master->_panic_write(master, mtd_get_master_ofs(mtd, to), len, retlen, buf); From e6f390a834b56583e6fc0949822644ce92fbb107 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 4 Jun 2020 21:52:07 -0500 Subject: [PATCH 003/343] gpio: arizona: handle pm_runtime_get_sync failure case Calling pm_runtime_get_sync increments the counter even in case of failure, causing incorrect ref count. Call pm_runtime_put if pm_runtime_get_sync fails. Signed-off-by: Navid Emamdoost Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20200605025207.65719-1-navid.emamdoost@gmail.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-arizona.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-arizona.c b/drivers/gpio/gpio-arizona.c index 5640efe5e750..7520a13b4c7c 100644 --- a/drivers/gpio/gpio-arizona.c +++ b/drivers/gpio/gpio-arizona.c @@ -106,6 +106,7 @@ static int arizona_gpio_direction_out(struct gpio_chip *chip, ret = pm_runtime_get_sync(chip->parent); if (ret < 0) { dev_err(chip->parent, "Failed to resume: %d\n", ret); + pm_runtime_put(chip->parent); return ret; } } From 861254d826499944cb4d9b5a15f5a794a6b99a69 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 4 Jun 2020 22:00:52 -0500 Subject: [PATCH 004/343] gpio: arizona: put pm_runtime in case of failure Calling pm_runtime_get_sync increments the counter even in case of failure, causing incorrect ref count if pm_runtime_put is not called in error handling paths. Call pm_runtime_put if pm_runtime_get_sync fails. Signed-off-by: Navid Emamdoost Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20200605030052.78235-1-navid.emamdoost@gmail.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-arizona.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-arizona.c b/drivers/gpio/gpio-arizona.c index 7520a13b4c7c..5bda38e0780f 100644 --- a/drivers/gpio/gpio-arizona.c +++ b/drivers/gpio/gpio-arizona.c @@ -64,6 +64,7 @@ static int arizona_gpio_get(struct gpio_chip *chip, unsigned offset) ret = pm_runtime_get_sync(chip->parent); if (ret < 0) { dev_err(chip->parent, "Failed to resume: %d\n", ret); + pm_runtime_put_autosuspend(chip->parent); return ret; } @@ -72,12 +73,15 @@ static int arizona_gpio_get(struct gpio_chip *chip, unsigned offset) if (ret < 0) { dev_err(chip->parent, "Failed to drop cache: %d\n", ret); + pm_runtime_put_autosuspend(chip->parent); return ret; } ret = regmap_read(arizona->regmap, reg, &val); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(chip->parent); return ret; + } pm_runtime_mark_last_busy(chip->parent); pm_runtime_put_autosuspend(chip->parent); From 064c73afe7385de99e5b2785b88c83dc5d84403b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Jun 2020 16:40:33 +0300 Subject: [PATCH 005/343] gpio: pca953x: Synchronize interrupt handler properly Since the commit aa58a21ae378 ("gpio: pca953x: disable regmap locking") the locking of regmap is disabled and that immediately introduces a synchronization issue. It's easy to see when we try to monitor more than one interrupt from the same chip. It seems that the problem exists from the day one and even commit 6e20fb18054c ("drivers/gpio/pca953x.c: add a mutex to fix race condition") missed this. Below are the traces and shell reproducers before and after proposed change. Note duplicates in the IRQ events. /proc/interrupts also shows a deviation, i.e. sum of children interrupts higher than parent's one. When locking is disabled for regmap and no protection in IRQ handler ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ... gpioset-194 regmap_hw_write_start: i2c-INT3491:02 reg=2 count=1 irq/31-i2c-INT3-139 regmap_hw_read_start: i2c-INT3491:02 reg=4c count=2 gpioset-194 regmap_hw_write_done: i2c-INT3491:02 reg=2 count=1 gpioset-194 regmap_reg_read_cache: i2c-INT3491:02 reg=6 val=f5 gpioset-194 regmap_reg_write: i2c-INT3491:02 reg=6 val=f5 gpioset-194 regmap_hw_write_start: i2c-INT3491:02 reg=6 count=1 irq/31-i2c-INT3-139 regmap_hw_read_done: i2c-INT3491:02 reg=4c count=2 ... % gpiomon gpiochip3 0 & % gpioset gpiochip3 1=0 % gpioset gpiochip3 1=1 event: RISING EDGE offset: 0 timestamp: [ 302.782583765] % gpiomon gpiochip3 2 & % gpioset gpiochip3 1=0 event: RISING EDGE offset: 2 timestamp: [ 312.033148829] event: FALLING EDGE offset: 0 timestamp: [ 312.022757525] % gpioset gpiochip3 1=1 event: RISING EDGE offset: 2 timestamp: [ 316.201148473] event: RISING EDGE offset: 0 timestamp: [ 316.191759599] When locking is disabled for regmap and protection in IRQ handler ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ... gpioset-202 regmap_hw_write_start: i2c-INT3491:02 reg=2 count=1 gpioset-202 regmap_hw_write_done: i2c-INT3491:02 reg=2 count=1 gpioset-202 regmap_reg_read_cache: i2c-INT3491:02 reg=6 val=fd gpioset-202 regmap_reg_write: i2c-INT3491:02 reg=6 val=fd gpioset-202 regmap_hw_write_start: i2c-INT3491:02 reg=6 count=1 gpioset-202 regmap_hw_write_done: i2c-INT3491:02 reg=6 count=1 irq/31-i2c-INT3-139 regmap_hw_read_start: i2c-INT3491:02 reg=4c count=2 irq/31-i2c-INT3-139 regmap_hw_read_done: i2c-INT3491:02 reg=4c count=2 ... % gpiomon gpiochip3 0 & % gpioset gpiochip3 1=0 event: FALLING EDGE offset: 0 timestamp: [ 531.330078107] % gpioset gpiochip3 1=1 event: RISING EDGE offset: 0 timestamp: [ 532.912239128] % gpiomon gpiochip3 2 & % gpioset gpiochip3 1=0 event: FALLING EDGE offset: 0 timestamp: [ 539.633669484] % gpioset gpiochip3 1=1 event: RISING EDGE offset: 0 timestamp: [ 542.256978461] Fixes: 6e20fb18054c ("drivers/gpio/pca953x.c: add a mutex to fix race condition") Depends-on: 35d13d94893f ("gpio: pca953x: convert to use bitmap API") Depends-on: 49427232764d ("gpio: pca953x: Perform basic regmap conversion") Cc: Marek Vasut Cc: Roland Stigge Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 1fca8dd7824f..afe78639ec58 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -734,14 +734,16 @@ static irqreturn_t pca953x_irq_handler(int irq, void *devid) struct gpio_chip *gc = &chip->gpio_chip; DECLARE_BITMAP(pending, MAX_LINE); int level; + bool ret; - if (!pca953x_irq_pending(chip, pending)) - return IRQ_NONE; + mutex_lock(&chip->i2c_lock); + ret = pca953x_irq_pending(chip, pending); + mutex_unlock(&chip->i2c_lock); for_each_set_bit(level, pending, gc->ngpio) handle_nested_irq(irq_find_mapping(gc->irq.domain, level)); - return IRQ_HANDLED; + return IRQ_RETVAL(ret); } static int pca953x_irq_setup(struct pca953x_chip *chip, int irq_base) From ba8c90c6184784b397807b72403656085ac2f8c1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Jun 2020 16:40:34 +0300 Subject: [PATCH 006/343] gpio: pca953x: Override IRQ for one of the expanders on Galileo Gen 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACPI table on Intel Galileo Gen 2 has wrong pin number for IRQ resource of one of the I²C GPIO expanders. Since we know what that number is and luckily have GPIO bases fixed for SoC's controllers, we may use a simple DMI quirk to match the platform and retrieve GpioInt() pin on it for the expander in question. Mika suggested the way to avoid a quirk in the GPIO ACPI library and here is the second, almost rewritten version of it. Fixes: f32517bf1ae0 ("gpio: pca953x: support ACPI devices found on Galileo Gen2") Depends-on: 25e3ef894eef ("gpio: acpi: Split out acpi_gpio_get_irq_resource() helper") Suggested-by: Mika Westerberg Reviewed-by: Mika Westerberg Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 79 +++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index afe78639ec58..4d3157d8b5cd 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -107,6 +107,79 @@ static const struct i2c_device_id pca953x_id[] = { }; MODULE_DEVICE_TABLE(i2c, pca953x_id); +#ifdef CONFIG_GPIO_PCA953X_IRQ + +#include +#include +#include + +static const struct dmi_system_id pca953x_dmi_acpi_irq_info[] = { + { + /* + * On Intel Galileo Gen 2 board the IRQ pin of one of + * the I²C GPIO expanders, which has GpioInt() resource, + * is provided as an absolute number instead of being + * relative. Since first controller (gpio-sch.c) and + * second (gpio-dwapb.c) are at the fixed bases, we may + * safely refer to the number in the global space to get + * an IRQ out of it. + */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_NAME, "GalileoGen2"), + }, + }, + {} +}; + +#ifdef CONFIG_ACPI +static int pca953x_acpi_get_pin(struct acpi_resource *ares, void *data) +{ + struct acpi_resource_gpio *agpio; + int *pin = data; + + if (acpi_gpio_get_irq_resource(ares, &agpio)) + *pin = agpio->pin_table[0]; + return 1; +} + +static int pca953x_acpi_find_pin(struct device *dev) +{ + struct acpi_device *adev = ACPI_COMPANION(dev); + int pin = -ENOENT, ret; + LIST_HEAD(r); + + ret = acpi_dev_get_resources(adev, &r, pca953x_acpi_get_pin, &pin); + acpi_dev_free_resource_list(&r); + if (ret < 0) + return ret; + + return pin; +} +#else +static inline int pca953x_acpi_find_pin(struct device *dev) { return -ENXIO; } +#endif + +static int pca953x_acpi_get_irq(struct device *dev) +{ + int pin, ret; + + pin = pca953x_acpi_find_pin(dev); + if (pin < 0) + return pin; + + dev_info(dev, "Applying ACPI interrupt quirk (GPIO %d)\n", pin); + + if (!gpio_is_valid(pin)) + return -EINVAL; + + ret = gpio_request(pin, "pca953x interrupt"); + if (ret) + return ret; + + return gpio_to_irq(pin); +} +#endif + static const struct acpi_device_id pca953x_acpi_ids[] = { { "INT3491", 16 | PCA953X_TYPE | PCA_LATCH_INT, }, { } @@ -754,6 +827,12 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, int irq_base) DECLARE_BITMAP(irq_stat, MAX_LINE); int ret; + if (dmi_first_match(pca953x_dmi_acpi_irq_info)) { + ret = pca953x_acpi_get_irq(&client->dev); + if (ret > 0) + client->irq = ret; + } + if (!client->irq) return 0; From 0b22c25e1b81c5f718e89c4d759e6a359be24417 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Jun 2020 16:40:35 +0300 Subject: [PATCH 007/343] gpio: pca953x: Fix direction setting when configure an IRQ The commit 0f25fda840a9 ("gpio: pca953x: Zap ad-hoc reg_direction cache") seems inadvertently made a typo in pca953x_irq_bus_sync_unlock(). When the direction bit is 1 it means input, and the piece of code in question was looking for output ones that should be turned to inputs. Fix direction setting when configure an IRQ by injecting a bitmap complement operation. Fixes: 0f25fda840a9 ("gpio: pca953x: Zap ad-hoc reg_direction cache") Depends-on: 35d13d94893f ("gpio: pca953x: convert to use bitmap API") Cc: Marek Vasut Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 4d3157d8b5cd..97c9ac31ecb5 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -696,8 +696,6 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d) DECLARE_BITMAP(reg_direction, MAX_LINE); int level; - pca953x_read_regs(chip, chip->regs->direction, reg_direction); - if (chip->driver_data & PCA_PCAL) { /* Enable latch on interrupt-enabled inputs */ pca953x_write_regs(chip, PCAL953X_IN_LATCH, chip->irq_mask); @@ -708,7 +706,11 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d) pca953x_write_regs(chip, PCAL953X_INT_MASK, irq_mask); } + /* Switch direction to input if needed */ + pca953x_read_regs(chip, chip->regs->direction, reg_direction); + bitmap_or(irq_mask, chip->irq_trig_fall, chip->irq_trig_raise, gc->ngpio); + bitmap_complement(reg_direction, reg_direction, gc->ngpio); bitmap_and(irq_mask, irq_mask, reg_direction, gc->ngpio); /* Look for any newly setup interrupt */ From ec3decd21380081e3b5de4ba8d85d90a95f201a0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Jun 2020 16:40:36 +0300 Subject: [PATCH 008/343] gpio: pca953x: disable regmap locking for automatic address incrementing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a repetition of the commit aa58a21ae378 ("gpio: pca953x: disable regmap locking") which states the following: This driver uses its own locking but regmap silently uses a mutex for all operations too. Add the option to disable locking to the regmap config struct. Fixes: bcf41dc480b1 ("gpio: pca953x: fix handling of automatic address incrementing") Cc: Uwe Kleine-König Signed-off-by: Andy Shevchenko Reviewed-by: Uwe Kleine-König Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 97c9ac31ecb5..6f409ee0b033 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -395,6 +395,7 @@ static const struct regmap_config pca953x_ai_i2c_regmap = { .writeable_reg = pca953x_writeable_register, .volatile_reg = pca953x_volatile_register, + .disable_locking = true, .cache_type = REGCACHE_RBTREE, .max_register = 0x7f, }; From 27a5e7d36d383970affae801d77141deafd536a8 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 8 Jun 2020 10:44:58 +0200 Subject: [PATCH 009/343] mmc: meson-gx: limit segments to 1 when dram-access-quirk is needed The actual max_segs computation leads to failure while using the broadcom sdio brcmfmac/bcmsdh driver, since the driver tries to make usage of scatter gather. But with the dram-access-quirk we use a 1,5K SRAM bounce buffer, and the max_segs current value of 3 leads to max transfers to 4,5k, which doesn't work. This patch sets max_segs to 1 to better describe the hardware limitation, and fix the SDIO functionality with the brcmfmac/bcmsdh driver on Amlogic G12A/G12B SoCs on boards like SEI510 or Khadas VIM3. Reported-by: Art Nikpal Reported-by: Christian Hewitt Fixes: acdc8e71d9bb ("mmc: meson-gx: add dram-access-quirk") Signed-off-by: Neil Armstrong Reviewed-by: Kevin Hilman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200608084458.32014-1-narmstrong@baylibre.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 7eb38d7482c6..08a3b1c05acb 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -1146,9 +1146,11 @@ static int meson_mmc_probe(struct platform_device *pdev) mmc->caps |= MMC_CAP_CMD23; if (host->dram_access_quirk) { + /* Limit segments to 1 due to low available sram memory */ + mmc->max_segs = 1; /* Limit to the available sram memory */ - mmc->max_segs = SD_EMMC_SRAM_DATA_BUF_LEN / mmc->max_blk_size; - mmc->max_blk_count = mmc->max_segs; + mmc->max_blk_count = SD_EMMC_SRAM_DATA_BUF_LEN / + mmc->max_blk_size; } else { mmc->max_blk_count = CMD_CFG_LENGTH_MASK; mmc->max_segs = SD_EMMC_DESC_BUF_LEN / From 00fdec98d9881bf5173af09aebd353ab3b9ac729 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 19 May 2020 22:28:32 -0700 Subject: [PATCH 010/343] ARC: entry: fix potential EFA clobber when TIF_SYSCALL_TRACE Trap handler for syscall tracing reads EFA (Exception Fault Address), in case strace wants PC of trap instruction (EFA is not part of pt_regs as of current code). However this EFA read is racy as it happens after dropping to pure kernel mode (re-enabling interrupts). A taken interrupt could context-switch, trigger a different task's trap, clobbering EFA for this execution context. Fix this by reading EFA early, before re-enabling interrupts. A slight side benefit is de-duplication of FAKE_RET_FROM_EXCPN in trap handler. The trap handler is common to both ARCompact and ARCv2 builds too. This just came out of code rework/review and no real problem was reported but is clearly a potential problem specially for strace. Cc: Signed-off-by: Vineet Gupta --- arch/arc/kernel/entry.S | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 60406ec62eb8..ea00c8a17f07 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -165,7 +165,6 @@ END(EV_Extension) tracesys: ; save EFA in case tracer wants the PC of traced task ; using ERET won't work since next-PC has already committed - lr r12, [efa] GET_CURR_TASK_FIELD_PTR TASK_THREAD, r11 st r12, [r11, THREAD_FAULT_ADDR] ; thread.fault_address @@ -208,15 +207,9 @@ tracesys_exit: ; Breakpoint TRAP ; --------------------------------------------- trap_with_param: - - ; stop_pc info by gdb needs this info - lr r0, [efa] + mov r0, r12 ; EFA in case ptracer/gdb wants stop_pc mov r1, sp - ; Now that we have read EFA, it is safe to do "fake" rtie - ; and get out of CPU exception mode - FAKE_RET_FROM_EXCPN - ; Save callee regs in case gdb wants to have a look ; SP will grow up by size of CALLEE Reg-File ; NOTE: clobbers r12 @@ -243,6 +236,10 @@ ENTRY(EV_Trap) EXCEPTION_PROLOGUE + lr r12, [efa] + + FAKE_RET_FROM_EXCPN + ;============ TRAP 1 :breakpoints ; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR) bmsk.f 0, r10, 7 @@ -250,9 +247,6 @@ ENTRY(EV_Trap) ;============ TRAP (no param): syscall top level - ; First return from Exception to pure K mode (Exception/IRQs renabled) - FAKE_RET_FROM_EXCPN - ; If syscall tracing ongoing, invoke pre-post-hooks GET_CURR_THR_INFO_FLAGS r10 btst r10, TIF_SYSCALL_TRACE From 33b59f1671f105a6da9c0aa75d7cf6bea126d2c5 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 13 May 2020 01:08:23 -0700 Subject: [PATCH 011/343] ARC: [arcompact] fix bitrot with 2 levels of interrupt Signed-off-by: Vineet Gupta --- arch/arc/include/asm/irqflags-compact.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index 7fc73fef5e29..863d63ad18d6 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -90,6 +90,9 @@ static inline void arch_local_irq_restore(unsigned long flags) /* * Unconditionally Enable IRQs */ +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS +extern void arch_local_irq_enable(void); +#else static inline void arch_local_irq_enable(void) { unsigned long temp; @@ -102,7 +105,7 @@ static inline void arch_local_irq_enable(void) : "n"((STATUS_E1_MASK | STATUS_E2_MASK)) : "cc", "memory"); } - +#endif /* * Unconditionally Disable IRQs From b7faf971081a4e56147f082234bfff55135305cb Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 27 May 2020 14:18:45 -0700 Subject: [PATCH 012/343] ARC: elf: use right ELF_ARCH Cc: Signed-off-by: Vineet Gupta --- arch/arc/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h index c77a0e3671ac..0284ace0e1ab 100644 --- a/arch/arc/include/asm/elf.h +++ b/arch/arc/include/asm/elf.h @@ -19,7 +19,7 @@ #define R_ARC_32_PCREL 0x31 /*to set parameters in the core dumps */ -#define ELF_ARCH EM_ARCOMPACT +#define ELF_ARCH EM_ARC_INUSE #define ELF_CLASS ELFCLASS32 #ifdef CONFIG_CPU_BIG_ENDIAN From 97d0b5d0b5a99871a983ca9b5c02bfde8bf73cbf Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 10 Jun 2020 12:26:15 -0700 Subject: [PATCH 013/343] ARCv2: boot log: detect newer/upconing HS3x/HS4x releases Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index dad8a656a2f1..41f07b3e594e 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -58,10 +58,12 @@ static const struct id_to_str arc_legacy_rel[] = { { 0x00, NULL } }; -static const struct id_to_str arc_cpu_rel[] = { +static const struct id_to_str arc_hs_ver54_rel[] = { /* UARCH.MAJOR, Release */ { 0, "R3.10a"}, { 1, "R3.50a"}, + { 2, "R3.60a"}, + { 3, "R4.00a"}, { 0xFF, NULL } }; @@ -117,12 +119,6 @@ static void decode_arc_core(struct cpuinfo_arc *cpu) struct bcr_uarch_build_arcv2 uarch; const struct id_to_str *tbl; - /* - * Up until (including) the first core4 release (0x54) things were - * simple: AUX IDENTITY.ARCVER was sufficient to identify arc family - * and release: 0x50 to 0x53 was HS38, 0x54 was HS48 (dual issue) - */ - if (cpu->core.family < 0x54) { /* includes arc700 */ for (tbl = &arc_legacy_rel[0]; tbl->id != 0; tbl++) { @@ -143,11 +139,10 @@ static void decode_arc_core(struct cpuinfo_arc *cpu) } /* - * However the subsequent HS release (same 0x54) allow HS38 or HS48 - * configurations and encode this info in a different BCR. - * The BCR was introduced in 0x54 so can't be read unconditionally. + * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until + * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent + * releases only update it. */ - READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch); if (uarch.prod == 4) { @@ -158,7 +153,7 @@ static void decode_arc_core(struct cpuinfo_arc *cpu) cpu->name = "HS38"; } - for (tbl = &arc_cpu_rel[0]; tbl->id != 0xFF; tbl++) { + for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) { if (uarch.maj == tbl->id) { cpu->release = tbl->str; break; From 0bdd6e7428a2e8971d7c9b8e212056dd0e0001c9 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 4 Jun 2020 20:39:24 +0300 Subject: [PATCH 014/343] ARC: build: allow users to specify -mcpu kernel build system used to add -mcpu for each ARC ISA as default. These days there are versions and varaints of ARC HS cores some of which have specific -mcpu options to fine tune / optimize generated code. So allow users/external build systems to specify their own -mcpu This will be used in future patches for HSDK-4xD board support which uses specific -mcpu to utilize dual issue scheduling of the core. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta [abrodkin/vgupta: rewrote changelog] --- arch/arc/Kconfig | 9 +++++++++ arch/arc/Makefile | 21 +++++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index fddc70029727..323014149e48 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -170,6 +170,15 @@ config ARC_CPU_HS endchoice +config ARC_TUNE_MCPU + string "Override default -mcpu compiler flag" + default "" + help + Override default -mcpu=xxx compiler flag (which is set depending on + the ISA version) with the specified value. + NOTE: If specified flag isn't supported by current compiler the + ISA default value will be used as a fallback. + config CPU_BIG_ENDIAN bool "Enable Big Endian Mode" help diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 20e9ab6cc521..2b66e8264174 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -10,8 +10,25 @@ CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-) endif cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ -cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 -cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38 + +tune-mcpu-def-$(CONFIG_ISA_ARCOMPACT) := -mA7 +tune-mcpu-def-$(CONFIG_ISA_ARCV2) := -mcpu=hs38 + +ifeq ($(CONFIG_ARC_TUNE_MCPU),"") +cflags-y += $(tune-mcpu-def-y) +else +tune-mcpu := $(shell echo $(CONFIG_ARC_TUNE_MCPU)) +tune-mcpu-ok := $(call cc-option-yn, $(tune-mcpu)) +ifeq ($(tune-mcpu-ok),y) +cflags-y += $(tune-mcpu) +else +# The flag provided by 'CONFIG_ARC_TUNE_MCPU' option isn't known by this compiler +# (probably the compiler is too old). Use ISA default mcpu flag instead as a safe option. +$(warning ** WARNING ** CONFIG_ARC_TUNE_MCPU flag '$(tune-mcpu)' is unknown, fallback to '$(tune-mcpu-def-y)') +cflags-y += $(tune-mcpu-def-y) +endif +endif + ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file From 040ece2a3c1503c0a7e327034510367747c27a5f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 16 Jun 2020 15:14:50 -0700 Subject: [PATCH 015/343] ARC: build: remove deprecated toggle for arc700 builds Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 2b66e8264174..d00f8b8afd08 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -11,7 +11,7 @@ endif cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ -tune-mcpu-def-$(CONFIG_ISA_ARCOMPACT) := -mA7 +tune-mcpu-def-$(CONFIG_ISA_ARCOMPACT) := -mcpu=arc700 tune-mcpu-def-$(CONFIG_ISA_ARCV2) := -mcpu=hs38 ifeq ($(CONFIG_ARC_TUNE_MCPU),"") From 774911290c589e98e3638e73b24b0a4d4530e97c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 17 Jun 2020 10:36:20 +0200 Subject: [PATCH 016/343] KVM: s390: reduce number of IO pins to 1 The current number of KVM_IRQCHIP_NUM_PINS results in an order 3 allocation (32kb) for each guest start/restart. This can result in OOM killer activity even with free swap when the memory is fragmented enough: kernel: qemu-system-s39 invoked oom-killer: gfp_mask=0x440dc0(GFP_KERNEL_ACCOUNT|__GFP_COMP|__GFP_ZERO), order=3, oom_score_adj=0 kernel: CPU: 1 PID: 357274 Comm: qemu-system-s39 Kdump: loaded Not tainted 5.4.0-29-generic #33-Ubuntu kernel: Hardware name: IBM 8562 T02 Z06 (LPAR) kernel: Call Trace: kernel: ([<00000001f848fe2a>] show_stack+0x7a/0xc0) kernel: [<00000001f8d3437a>] dump_stack+0x8a/0xc0 kernel: [<00000001f8687032>] dump_header+0x62/0x258 kernel: [<00000001f8686122>] oom_kill_process+0x172/0x180 kernel: [<00000001f8686abe>] out_of_memory+0xee/0x580 kernel: [<00000001f86e66b8>] __alloc_pages_slowpath+0xd18/0xe90 kernel: [<00000001f86e6ad4>] __alloc_pages_nodemask+0x2a4/0x320 kernel: [<00000001f86b1ab4>] kmalloc_order+0x34/0xb0 kernel: [<00000001f86b1b62>] kmalloc_order_trace+0x32/0xe0 kernel: [<00000001f84bb806>] kvm_set_irq_routing+0xa6/0x2e0 kernel: [<00000001f84c99a4>] kvm_arch_vm_ioctl+0x544/0x9e0 kernel: [<00000001f84b8936>] kvm_vm_ioctl+0x396/0x760 kernel: [<00000001f875df66>] do_vfs_ioctl+0x376/0x690 kernel: [<00000001f875e304>] ksys_ioctl+0x84/0xb0 kernel: [<00000001f875e39a>] __s390x_sys_ioctl+0x2a/0x40 kernel: [<00000001f8d55424>] system_call+0xd8/0x2c8 As far as I can tell s390x does not use the iopins as we bail our for anything other than KVM_IRQ_ROUTING_S390_ADAPTER and the chip/pin is only used for KVM_IRQ_ROUTING_IRQCHIP. So let us use a small number to reduce the memory footprint. Signed-off-by: Christian Borntraeger Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20200617083620.5409-1-borntraeger@de.ibm.com --- arch/s390/include/asm/kvm_host.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index cee3cb6455a2..6ea0820e7c7f 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -31,12 +31,12 @@ #define KVM_USER_MEM_SLOTS 32 /* - * These seem to be used for allocating ->chip in the routing table, - * which we don't use. 4096 is an out-of-thin-air value. If we need - * to look at ->chip later on, we'll need to revisit this. + * These seem to be used for allocating ->chip in the routing table, which we + * don't use. 1 is as small as we can get to reduce the needed memory. If we + * need to look at ->chip later on, we'll need to revisit this. */ #define KVM_NR_IRQCHIPS 1 -#define KVM_IRQCHIP_NUM_PINS 4096 +#define KVM_IRQCHIP_NUM_PINS 1 #define KVM_HALT_POLL_NS_DEFAULT 50000 /* s390-specific vcpu->requests bit members */ From 10011f7d95dea311c0f2a3ea6725b5a2e97015a8 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 4 Jun 2020 20:39:25 +0300 Subject: [PATCH 017/343] ARCv2: support loop buffer (LPB) disabling On HS cores, loop buffer (LPB) is programmable in runtime and can be optionally disabled. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 6 ++++++ arch/arc/kernel/head.S | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 323014149e48..197896cfbd23 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -474,6 +474,12 @@ config ARC_IRQ_NO_AUTOSAVE This is programmable and can be optionally disabled in which case software INTERRUPT_PROLOGUE/EPILGUE do the needed work +config ARC_LPB_DISABLE + bool "Disable loop buffer (LPB)" + help + On HS cores, loop buffer (LPB) is programmable in runtime and can + be optionally disabled. + endif # ISA_ARCV2 endmenu # "ARC CPU Configuration" diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 6eb23f1545ee..17fd1ed700cc 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -59,6 +59,14 @@ bclr r5, r5, STATUS_AD_BIT #endif kflag r5 + +#ifdef CONFIG_ARC_LPB_DISABLE + lr r5, [ARC_REG_LPB_BUILD] + breq r5, 0, 1f ; LPB doesn't exist + mov r5, 1 + sr r5, [ARC_REG_LPB_CTRL] +1: +#endif /* CONFIG_ARC_LPB_DISABLE */ #endif ; Config DSP_CTRL properly, so kernel may use integer multiply, ; multiply-accumulate, and divide operations From 69339d083dfb7786b0e0b3fc19eaddcf11fabdfb Mon Sep 17 00:00:00 2001 From: Jacky Hu Date: Tue, 16 Jun 2020 09:50:24 +0800 Subject: [PATCH 018/343] pinctrl: amd: fix npins for uart0 in kerncz_groups uart0_pins is defined as: static const unsigned uart0_pins[] = {135, 136, 137, 138, 139}; which npins is wronly specified as 9 later { .name = "uart0", .pins = uart0_pins, .npins = 9, }, npins should be 5 instead of 9 according to the definition. Signed-off-by: Jacky Hu Link: https://lore.kernel.org/r/20200616015024.287683-1-hengqing.hu@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-amd.h b/drivers/pinctrl/pinctrl-amd.h index 3e5760f1a715..d4a192df5fab 100644 --- a/drivers/pinctrl/pinctrl-amd.h +++ b/drivers/pinctrl/pinctrl-amd.h @@ -252,7 +252,7 @@ static const struct amd_pingroup kerncz_groups[] = { { .name = "uart0", .pins = uart0_pins, - .npins = 9, + .npins = 5, }, { .name = "uart1", From 7733306bd593c737c63110175da6c35b4b8bb32c Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 18 Jun 2020 18:12:54 +0100 Subject: [PATCH 019/343] KVM: arm64: Annotate hyp NMI-related functions as __always_inline The "inline" keyword is a hint for the compiler to inline a function. The functions system_uses_irq_prio_masking() and gic_write_pmr() are used by the code running at EL2 on a non-VHE system, so mark them as __always_inline to make sure they'll always be part of the .hyp.text section. This fixes the following splat when trying to run a VM: [ 47.625273] Kernel panic - not syncing: HYP panic: [ 47.625273] PS:a00003c9 PC:0000ca0b42049fc4 ESR:86000006 [ 47.625273] FAR:0000ca0b42049fc4 HPFAR:0000000010001000 PAR:0000000000000000 [ 47.625273] VCPU:0000000000000000 [ 47.647261] CPU: 1 PID: 217 Comm: kvm-vcpu-0 Not tainted 5.8.0-rc1-ARCH+ #61 [ 47.654508] Hardware name: Globalscale Marvell ESPRESSOBin Board (DT) [ 47.661139] Call trace: [ 47.663659] dump_backtrace+0x0/0x1cc [ 47.667413] show_stack+0x18/0x24 [ 47.670822] dump_stack+0xb8/0x108 [ 47.674312] panic+0x124/0x2f4 [ 47.677446] panic+0x0/0x2f4 [ 47.680407] SMP: stopping secondary CPUs [ 47.684439] Kernel Offset: disabled [ 47.688018] CPU features: 0x240402,20002008 [ 47.692318] Memory Limit: none [ 47.695465] ---[ end Kernel panic - not syncing: HYP panic: [ 47.695465] PS:a00003c9 PC:0000ca0b42049fc4 ESR:86000006 [ 47.695465] FAR:0000ca0b42049fc4 HPFAR:0000000010001000 PAR:0000000000000000 [ 47.695465] VCPU:0000000000000000 ]--- The instruction abort was caused by the code running at EL2 trying to fetch an instruction which wasn't mapped in the EL2 translation tables. Using objdump showed the two functions as separate symbols in the .text section. Fixes: 85738e05dc38 ("arm64: kvm: Unmask PMR before entering guest") Cc: stable@vger.kernel.org Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Acked-by: James Morse Link: https://lore.kernel.org/r/20200618171254.1596055-1-alexandru.elisei@arm.com --- arch/arm64/include/asm/arch_gicv3.h | 2 +- arch/arm64/include/asm/cpufeature.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index a358e97572c1..6647ae4f0231 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -109,7 +109,7 @@ static inline u32 gic_read_pmr(void) return read_sysreg_s(SYS_ICC_PMR_EL1); } -static inline void gic_write_pmr(u32 val) +static __always_inline void gic_write_pmr(u32 val) { write_sysreg_s(val, SYS_ICC_PMR_EL1); } diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5d1f4ae42799..f7c3d1ff091d 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -675,7 +675,7 @@ static inline bool system_supports_generic_auth(void) cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH); } -static inline bool system_uses_irq_prio_masking(void) +static __always_inline bool system_uses_irq_prio_masking(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); From 66b7e05dc0239c5817859f261098ba9cc2efbd2b Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 17 Jun 2020 11:54:56 +0100 Subject: [PATCH 020/343] KVM: arm64: Fix kvm_reset_vcpu() return code being incorrect with SVE If SVE is enabled then 'ret' can be assigned the return value of kvm_vcpu_enable_sve() which may be 0 causing future "goto out" sites to erroneously return 0 on failure rather than -EINVAL as expected. Remove the initialisation of 'ret' and make setting the return value explicit to avoid this situation in the future. Fixes: 9a3cdf26e336 ("KVM: arm64/sve: Allow userspace to enable SVE for vcpus") Cc: stable@vger.kernel.org Reported-by: James Morse Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200617105456.28245-1-steven.price@arm.com --- arch/arm64/kvm/reset.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d3b209023727..6ed36be51b4b 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -245,7 +245,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - int ret = -EINVAL; + int ret; bool loaded; u32 pstate; @@ -269,15 +269,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) { - if (kvm_vcpu_enable_ptrauth(vcpu)) + if (kvm_vcpu_enable_ptrauth(vcpu)) { + ret = -EINVAL; goto out; + } } switch (vcpu->arch.target) { default: if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { - if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) + if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) { + ret = -EINVAL; goto out; + } pstate = VCPU_RESET_PSTATE_SVC; } else { pstate = VCPU_RESET_PSTATE_EL1; From a25e91028ac2f544e0140aff2c9360a0e995dd86 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 22 Jun 2020 16:27:10 +0200 Subject: [PATCH 021/343] KVM: arm64: pvtime: Ensure task delay accounting is enabled Ensure we're actually accounting run_delay before we claim that we'll expose it to the guest. If we're not, then we just pretend like steal time isn't supported in order to avoid any confusion. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200622142710.18677-1-drjones@redhat.com --- arch/arm64/kvm/pvtime.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index 1e0f4c284888..f7b52ce1557e 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -73,6 +74,11 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) return base; } +static bool kvm_arm_pvtime_supported(void) +{ + return !!sched_info_on(); +} + int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -82,7 +88,8 @@ int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, int ret = 0; int idx; - if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA) + if (!kvm_arm_pvtime_supported() || + attr->attr != KVM_ARM_VCPU_PVTIME_IPA) return -ENXIO; if (get_user(ipa, user)) @@ -110,7 +117,8 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, u64 __user *user = (u64 __user *)attr->addr; u64 ipa; - if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA) + if (!kvm_arm_pvtime_supported() || + attr->attr != KVM_ARM_VCPU_PVTIME_IPA) return -ENXIO; ipa = vcpu->arch.steal.base; @@ -125,7 +133,8 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, { switch (attr->attr) { case KVM_ARM_VCPU_PVTIME_IPA: - return 0; + if (kvm_arm_pvtime_supported()) + return 0; } return -ENXIO; } From c42ad5d4358230be166fc36c87bae0dd1680c570 Mon Sep 17 00:00:00 2001 From: Andreas Gerstmayr Date: Fri, 19 Jun 2020 17:32:31 +0200 Subject: [PATCH 022/343] perf flamegraph: Explicitly set utf-8 encoding On some platforms the default encoding is not utf-8, which causes an UnicodeDecodeError when reading the flamegraph template and writing the flamegraph Signed-off-by: Andreas Gerstmayr Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200619153232.203537-1-agerstmayr@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/flamegraph.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py index 61f3be9add6b..65780013f745 100755 --- a/tools/perf/scripts/python/flamegraph.py +++ b/tools/perf/scripts/python/flamegraph.py @@ -17,6 +17,7 @@ from __future__ import print_function import sys import os +import io import argparse import json @@ -81,7 +82,7 @@ class FlameGraphCLI: if self.args.format == "html": try: - with open(self.args.template) as f: + with io.open(self.args.template, encoding="utf-8") as f: output_str = f.read().replace("/** @flamegraph_json **/", json_str) except IOError as e: @@ -93,11 +94,12 @@ class FlameGraphCLI: output_fn = self.args.output or "stacks.json" if output_fn == "-": - sys.stdout.write(output_str) + with io.open(sys.stdout.fileno(), "w", encoding="utf-8", closefd=False) as out: + out.write(output_str) else: print("dumping data to {}".format(output_fn)) try: - with open(output_fn, "w") as out: + with io.open(output_fn, "w", encoding="utf-8") as out: out.write(output_str) except IOError as e: print("Error writing output file: {}".format(e), file=sys.stderr) From 5d8913504ccfeea6120df5ae1c6f4479ff09b931 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 18 Jun 2020 14:49:06 +0300 Subject: [PATCH 023/343] gpio: pca953x: Fix GPIO resource leak on Intel Galileo Gen 2 When adding a quirk for IRQ on Intel Galileo Gen 2 the commit ba8c90c61847 ("gpio: pca953x: Override IRQ for one of the expanders on Galileo Gen 2") missed GPIO resource release. We can safely do this in the same quirk, since IRQ will be locked by GPIO framework when requested and unlocked on freeing. Fixes: ba8c90c61847 ("gpio: pca953x: Override IRQ for one of the expanders on Galileo Gen 2") Signed-off-by: Andy Shevchenko Cc: Mika Westerberg Reviewed-by: Mika Westerberg Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 6f409ee0b033..a3b9bdedbe44 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -176,7 +176,12 @@ static int pca953x_acpi_get_irq(struct device *dev) if (ret) return ret; - return gpio_to_irq(pin); + ret = gpio_to_irq(pin); + + /* When pin is used as an IRQ, no need to keep it requested */ + gpio_free(pin); + + return ret; } #endif From 4e15507fea70c0c312d79610efa46b6853ccf8e0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 20 Jun 2020 20:11:59 -0700 Subject: [PATCH 024/343] libbpf: Forward-declare bpf_stats_type for systems with outdated UAPI headers Systems that doesn't yet have the very latest linux/bpf.h header, enum bpf_stats_type will be undefined, causing compilation warnings. Prevents this by forward-declaring enum. Fixes: 0bee106716cf ("libbpf: Add support for command BPF_ENABLE_STATS") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200621031159.2279101-1-andriin@fb.com --- tools/lib/bpf/bpf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 1b6015b21ba8..dbef24ebcfcb 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -233,6 +233,8 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); + +enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */ LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type); #ifdef __cplusplus From adf46113a608d9515801997fc96cbfe8ffa89ed3 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Tue, 23 Jun 2020 14:01:11 +0800 Subject: [PATCH 025/343] ASoC: fsl_mqs: Don't check clock is NULL before calling clk API Because clk_prepare_enable and clk_disable_unprepare should check input clock parameter is NULL or not internally, then we don't need to check them before calling the function. Fixes: 9e28f6532c61 ("ASoC: fsl_mqs: Add MQS component driver") Signed-off-by: Shengjiu Wang Acked-by: Nicolin Chen Link: https://lore.kernel.org/r/743be216bd504c26e8d45d5ce4a84561b67a122b.1592888591.git.shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_mqs.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/sound/soc/fsl/fsl_mqs.c b/sound/soc/fsl/fsl_mqs.c index 0c813a45bba7..b44b134390a3 100644 --- a/sound/soc/fsl/fsl_mqs.c +++ b/sound/soc/fsl/fsl_mqs.c @@ -266,11 +266,9 @@ static int fsl_mqs_runtime_resume(struct device *dev) { struct fsl_mqs *mqs_priv = dev_get_drvdata(dev); - if (mqs_priv->ipg) - clk_prepare_enable(mqs_priv->ipg); + clk_prepare_enable(mqs_priv->ipg); - if (mqs_priv->mclk) - clk_prepare_enable(mqs_priv->mclk); + clk_prepare_enable(mqs_priv->mclk); if (mqs_priv->use_gpr) regmap_write(mqs_priv->regmap, IOMUXC_GPR2, @@ -292,11 +290,8 @@ static int fsl_mqs_runtime_suspend(struct device *dev) regmap_read(mqs_priv->regmap, REG_MQS_CTRL, &mqs_priv->reg_mqs_ctrl); - if (mqs_priv->mclk) - clk_disable_unprepare(mqs_priv->mclk); - - if (mqs_priv->ipg) - clk_disable_unprepare(mqs_priv->ipg); + clk_disable_unprepare(mqs_priv->mclk); + clk_disable_unprepare(mqs_priv->ipg); return 0; } From 15217d170a4461c1d4c1ea7c497e1fc1122e42a9 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Tue, 23 Jun 2020 14:01:12 +0800 Subject: [PATCH 026/343] ASoC: fsl_mqs: Fix unchecked return value for clk_prepare_enable Fix unchecked return value for clk_prepare_enable, add error handler in fsl_mqs_runtime_resume. Fixes: 9e28f6532c61 ("ASoC: fsl_mqs: Add MQS component driver") Signed-off-by: Shengjiu Wang Acked-by: Nicolin Chen Link: https://lore.kernel.org/r/5edd68d03def367d96268f1a9a00bd528ea5aaf2.1592888591.git.shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_mqs.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_mqs.c b/sound/soc/fsl/fsl_mqs.c index b44b134390a3..69aeb0e71844 100644 --- a/sound/soc/fsl/fsl_mqs.c +++ b/sound/soc/fsl/fsl_mqs.c @@ -265,10 +265,20 @@ static int fsl_mqs_remove(struct platform_device *pdev) static int fsl_mqs_runtime_resume(struct device *dev) { struct fsl_mqs *mqs_priv = dev_get_drvdata(dev); + int ret; - clk_prepare_enable(mqs_priv->ipg); + ret = clk_prepare_enable(mqs_priv->ipg); + if (ret) { + dev_err(dev, "failed to enable ipg clock\n"); + return ret; + } - clk_prepare_enable(mqs_priv->mclk); + ret = clk_prepare_enable(mqs_priv->mclk); + if (ret) { + dev_err(dev, "failed to enable mclk clock\n"); + clk_disable_unprepare(mqs_priv->ipg); + return ret; + } if (mqs_priv->use_gpr) regmap_write(mqs_priv->regmap, IOMUXC_GPR2, From a3f574cd65487cd993f79ab235d70229d9302c1e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 23 Jun 2020 10:44:08 +0100 Subject: [PATCH 027/343] KVM: arm64: vgic-v4: Plug race between non-residency and v4.1 doorbell When making a vPE non-resident because it has hit a blocking WFI, the doorbell can fire at any time after the write to the RD. Crucially, it can fire right between the write to GICR_VPENDBASER and the write to the pending_last field in the its_vpe structure. This means that we would overwrite pending_last with stale data, and potentially not wakeup until some unrelated event (such as a timer interrupt) puts the vPE back on the CPU. GICv4 isn't affected by this as we actively mask the doorbell on entering the guest, while GICv4.1 automatically manages doorbell delivery without any hypervisor-driven masking. Use the vpe_lock to synchronize such update, which solves the problem altogether. Fixes: ae699ad348cdc ("irqchip/gic-v4.1: Move doorbell management to the GICv4 abstraction layer") Reported-by: Zenghui Yu Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-v4.c | 8 ++++++++ drivers/irqchip/irq-gic-v3-its.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 27ac833e5ec7..b5fa73c9fd35 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -90,7 +90,15 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) !irqd_irq_disabled(&irq_to_desc(irq)->irq_data)) disable_irq_nosync(irq); + /* + * The v4.1 doorbell can fire concurrently with the vPE being + * made non-resident. Ensure we only update pending_last + * *after* the non-residency sequence has completed. + */ + raw_spin_lock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock); vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; + raw_spin_unlock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock); + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index cd685f521c77..205f69592471 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4054,16 +4054,24 @@ static void its_vpe_4_1_deschedule(struct its_vpe *vpe, u64 val; if (info->req_db) { + unsigned long flags; + /* * vPE is going to block: make the vPE non-resident with * PendingLast clear and DB set. The GIC guarantees that if * we read-back PendingLast clear, then a doorbell will be * delivered when an interrupt comes. + * + * Note the locking to deal with the concurrent update of + * pending_last from the doorbell interrupt handler that can + * run concurrently. */ + raw_spin_lock_irqsave(&vpe->vpe_lock, flags); val = its_clear_vpend_valid(vlpi_base, GICR_VPENDBASER_PendingLast, GICR_VPENDBASER_4_1_DB); vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast); + raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags); } else { /* * We're not blocking, so just make the vPE non-resident From 9c82a63cf3701b78cd092c69c3e75ff806837194 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 19 Jun 2020 16:04:22 -0700 Subject: [PATCH 028/343] libbpf: Fix CO-RE relocs against .text section bpf_object__find_program_by_title(), used by CO-RE relocation code, doesn't return .text "BPF program", if it is a function storage for sub-programs. Because of that, any CO-RE relocation in helper non-inlined functions will fail. Fix this by searching for .text-corresponding BPF program manually. Adjust one of bpf_iter selftest to exhibit this pattern. Fixes: ddc7c3042614 ("libbpf: implement BPF CO-RE offset relocation algorithm") Reported-by: Yonghong Song Signed-off-by: Andrii Nakryiko Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200619230423.691274-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 8 +++++++- tools/testing/selftests/bpf/progs/bpf_iter_netlink.c | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 477c679ed945..f17151d866e6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4818,7 +4818,13 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) err = -EINVAL; goto out; } - prog = bpf_object__find_program_by_title(obj, sec_name); + prog = NULL; + for (i = 0; i < obj->nr_programs; i++) { + if (!strcmp(obj->programs[i].section_name, sec_name)) { + prog = &obj->programs[i]; + break; + } + } if (!prog) { pr_warn("failed to find program '%s' for CO-RE offset relocation\n", sec_name); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c index e7b8753eac0b..75ecf956a2df 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c @@ -25,7 +25,7 @@ struct bpf_iter__netlink { struct netlink_sock *sk; } __attribute__((preserve_access_index)); -static inline struct inode *SOCK_INODE(struct socket *socket) +static __attribute__((noinline)) struct inode *SOCK_INODE(struct socket *socket) { return &container_of(socket, struct socket_alloc, socket)->vfs_inode; } From c4c0bdc0d2d084ed847c7066bdf59fe2cd25aa17 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 17:10:54 -0700 Subject: [PATCH 029/343] bpf: Set the number of exception entries properly for subprograms Currently, if a bpf program has more than one subprograms, each program will be jitted separately. For programs with bpf-to-bpf calls the prog->aux->num_exentries is not setup properly. For example, with bpf_iter_netlink.c modified to force one function to be not inlined and with CONFIG_BPF_JIT_ALWAYS_ON the following error is seen: $ ./test_progs -n 3/3 ... libbpf: failed to load program 'iter/netlink' libbpf: failed to load object 'bpf_iter_netlink' libbpf: failed to load BPF skeleton 'bpf_iter_netlink': -4007 test_netlink:FAIL:bpf_iter_netlink__open_and_load skeleton open_and_load failed #3/3 netlink:FAIL The dmesg shows the following errors: ex gen bug which is triggered by the following code in arch/x86/net/bpf_jit_comp.c: if (excnt >= bpf_prog->aux->num_exentries) { pr_err("ex gen bug\n"); return -EFAULT; } This patch fixes the issue by computing proper num_exentries for each subprogram before calling JIT. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 34cde841ab68..8911d0576399 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9801,7 +9801,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) int i, j, subprog_start, subprog_end = 0, len, subprog; struct bpf_insn *insn; void *old_bpf_func; - int err; + int err, num_exentries; if (env->subprog_cnt <= 1) return 0; @@ -9876,6 +9876,14 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->nr_linfo = prog->aux->nr_linfo; func[i]->aux->jited_linfo = prog->aux->jited_linfo; func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; + num_exentries = 0; + insn = func[i]->insnsi; + for (j = 0; j < func[i]->len; j++, insn++) { + if (BPF_CLASS(insn->code) == BPF_LDX && + BPF_MODE(insn->code) == BPF_PROBE_MEM) + num_exentries++; + } + func[i]->aux->num_exentries = num_exentries; func[i] = bpf_int_jit_compile(func[i]); if (!func[i]->jited) { err = -ENOTSUPP; From b338cb921e6739ff59ce32f43342779fe5ffa732 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Sat, 20 Jun 2020 14:26:16 -0700 Subject: [PATCH 030/343] bpf: Restore behaviour of CAP_SYS_ADMIN allowing the loading of networking bpf programs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a fix for a regression in commit 2c78ee898d8f ("bpf: Implement CAP_BPF"). Before the above commit it was possible to load network bpf programs with just the CAP_SYS_ADMIN privilege. The Android bpfloader happens to run in such a configuration (it has SYS_ADMIN but not NET_ADMIN) and creates maps and loads bpf programs for later use by Android's netd (which has NET_ADMIN but not SYS_ADMIN). Fixes: 2c78ee898d8f ("bpf: Implement CAP_BPF") Reported-by: John Stultz Signed-off-by: Maciej Żenczykowski Signed-off-by: Alexei Starovoitov Tested-by: John Stultz Link: https://lore.kernel.org/bpf/20200620212616.93894-1-zenczykowski@gmail.com --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8da159936bab..7d946435587d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2121,7 +2121,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) !bpf_capable()) return -EPERM; - if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN)) + if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) return -EPERM; if (is_perfmon_prog_type(type) && !perfmon_capable()) return -EPERM; From bcc7f554cfa7e0ac77c7adc4027c16f4a2f99c6f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 23 Jun 2020 16:39:35 +0100 Subject: [PATCH 031/343] bpf: Fix formatting in documentation for BPF helpers When producing the bpf-helpers.7 man page from the documentation from the BPF user space header file, rst2man complains: :2636: (ERROR/3) Unexpected indentation. :2640: (WARNING/2) Block quote ends without a blank line; unexpected unindent. Let's fix formatting for the relevant chunk (item list in bpf_ringbuf_query()'s description), and for a couple other functions. Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200623153935.6215-1-quentin@isovalent.com --- include/uapi/linux/bpf.h | 41 +++++++++++++++++----------------- tools/include/uapi/linux/bpf.h | 41 +++++++++++++++++----------------- 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 974a71342aea..8bd33050b7bb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3171,13 +3171,12 @@ union bpf_attr { * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return - * 0, on success; - * < 0, on error. + * 0 on success, or a negative error in case of failure. * * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) * Description @@ -3189,20 +3188,20 @@ union bpf_attr { * void bpf_ringbuf_submit(void *data, u64 flags) * Description * Submit reserved ring buffer sample, pointed to by *data*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return * Nothing. Always succeeds. * * void bpf_ringbuf_discard(void *data, u64 flags) * Description * Discard reserved ring buffer sample, pointed to by *data*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return * Nothing. Always succeeds. * @@ -3210,16 +3209,18 @@ union bpf_attr { * Description * Query various characteristics of provided ring buffer. What * exactly is queries is determined by *flags*: - * - BPF_RB_AVAIL_DATA - amount of data not yet consumed; - * - BPF_RB_RING_SIZE - the size of ring buffer; - * - BPF_RB_CONS_POS - consumer position (can wrap around); - * - BPF_RB_PROD_POS - producer(s) position (can wrap around); - * Data returned is just a momentary snapshots of actual values + * + * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. + * * **BPF_RB_RING_SIZE**: The size of ring buffer. + * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). + * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). + * + * Data returned is just a momentary snapshot of actual values * and could be inaccurate, so this facility should be used to * power heuristics and for reporting, not to make 100% correct * calculation. * Return - * Requested value, or 0, if flags are not recognized. + * Requested value, or 0, if *flags* are not recognized. * * int bpf_csum_level(struct sk_buff *skb, u64 level) * Description diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 974a71342aea..8bd33050b7bb 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3171,13 +3171,12 @@ union bpf_attr { * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return - * 0, on success; - * < 0, on error. + * 0 on success, or a negative error in case of failure. * * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) * Description @@ -3189,20 +3188,20 @@ union bpf_attr { * void bpf_ringbuf_submit(void *data, u64 flags) * Description * Submit reserved ring buffer sample, pointed to by *data*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return * Nothing. Always succeeds. * * void bpf_ringbuf_discard(void *data, u64 flags) * Description * Discard reserved ring buffer sample, pointed to by *data*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return * Nothing. Always succeeds. * @@ -3210,16 +3209,18 @@ union bpf_attr { * Description * Query various characteristics of provided ring buffer. What * exactly is queries is determined by *flags*: - * - BPF_RB_AVAIL_DATA - amount of data not yet consumed; - * - BPF_RB_RING_SIZE - the size of ring buffer; - * - BPF_RB_CONS_POS - consumer position (can wrap around); - * - BPF_RB_PROD_POS - producer(s) position (can wrap around); - * Data returned is just a momentary snapshots of actual values + * + * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. + * * **BPF_RB_RING_SIZE**: The size of ring buffer. + * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). + * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). + * + * Data returned is just a momentary snapshot of actual values * and could be inaccurate, so this facility should be used to * power heuristics and for reporting, not to make 100% correct * calculation. * Return - * Requested value, or 0, if flags are not recognized. + * Requested value, or 0, if *flags* are not recognized. * * int bpf_csum_level(struct sk_buff *skb, u64 level) * Description From 9bc5fd71b680ca017d59510b4f402577758b8496 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Tue, 23 Jun 2020 20:53:12 +0800 Subject: [PATCH 032/343] ASoC: rt5682: fix the pop noise while OMTP type headset plugin To turn the headphone output switch off during jack type detection, it could avoid the pop noise when jack type switches to OMTP type. Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20200623125312.27896-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 3e9d2c6c51f9..7d6670abdb08 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -932,7 +932,9 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) RT5682_PWR_ANLG_1, RT5682_PWR_FV2, RT5682_PWR_FV2); snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3, RT5682_PWR_CBJ, RT5682_PWR_CBJ); - + snd_soc_component_update_bits(component, + RT5682_HP_CHARGE_PUMP_1, + RT5682_OSW_L_MASK | RT5682_OSW_R_MASK, 0); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_HIGH); @@ -956,6 +958,11 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) rt5682->jack_type = SND_JACK_HEADPHONE; break; } + + snd_soc_component_update_bits(component, + RT5682_HP_CHARGE_PUMP_1, + RT5682_OSW_L_MASK | RT5682_OSW_R_MASK, + RT5682_OSW_L_EN | RT5682_OSW_R_EN); } else { rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, From 0c1a7f13c9ec1ceb18d97ef4b1dd20ec71ffba31 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Wed, 17 Jun 2020 17:01:32 +0530 Subject: [PATCH 033/343] ieee80211: Add missing and new AKM suite selector definitions Add the definitions for missing AKM selectors defined in IEEE P802.11-REVmd/D3.0, table 9-151. These definitions will be used by various drivers that support these new AKM suites. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20200617113132.13477-1-vjakkam@codeaurora.org Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index fe15f831841b..9f732499ea88 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3333,13 +3333,17 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) #define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) #define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) +#define WLAN_AKM_SUITE_AP_PEER_KEY SUITE(0x000FAC, 10) #define WLAN_AKM_SUITE_8021X_SUITE_B SUITE(0x000FAC, 11) #define WLAN_AKM_SUITE_8021X_SUITE_B_192 SUITE(0x000FAC, 12) +#define WLAN_AKM_SUITE_FT_8021X_SHA384 SUITE(0x000FAC, 13) #define WLAN_AKM_SUITE_FILS_SHA256 SUITE(0x000FAC, 14) #define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) #define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) #define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) #define WLAN_AKM_SUITE_OWE SUITE(0x000FAC, 18) +#define WLAN_AKM_SUITE_FT_PSK_SHA384 SUITE(0x000FAC, 19) +#define WLAN_AKM_SUITE_PSK_SHA384 SUITE(0x000FAC, 20) #define WLAN_MAX_KEY_LEN 32 From 86a1b9d7c275a3dba69e7ab099f8d5f71f69f6a8 Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Mon, 22 Jun 2020 14:35:42 +0200 Subject: [PATCH 034/343] mac80211: fix control port tx status check The initial control port tx status patch assumed, that we have IEEE 802.11 frames, but actually ethernet frames are stored in the ack skb. Fix this by checking for the correct ethertype and skb protocol 802.3. Also allow tx status reports for ETH_P_PREAUTH, as preauth frames can also be send over the nl80211 control port. Fixes: a7528198add8 ("mac80211: support control port TX status reporting") Reported-by: Jouni Malinen Signed-off-by: Markus Theil Reported-by: kernel test robot Link: https://lore.kernel.org/r/20200622123542.173695-1-markus.theil@tu-ilmenau.de Signed-off-by: Johannes Berg --- net/mac80211/status.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 7b1bacac39c6..cbc40b358ba2 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -639,11 +639,23 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie; struct ieee80211_sub_if_data *sdata; struct ieee80211_hdr *hdr = (void *)skb->data; + __be16 ethertype = 0; + + if (skb->len >= ETH_HLEN && skb->protocol == cpu_to_be16(ETH_P_802_3)) + skb_copy_bits(skb, 2 * ETH_ALEN, ðertype, ETH_TLEN); rcu_read_lock(); sdata = ieee80211_sdata_from_skb(local, skb); if (sdata) { - if (ieee80211_is_any_nullfunc(hdr->frame_control)) + if (ethertype == sdata->control_port_protocol || + ethertype == cpu_to_be16(ETH_P_PREAUTH)) + cfg80211_control_port_tx_status(&sdata->wdev, + cookie, + skb->data, + skb->len, + acked, + GFP_ATOMIC); + else if (ieee80211_is_any_nullfunc(hdr->frame_control)) cfg80211_probe_status(sdata->dev, hdr->addr1, cookie, acked, info->status.ack_signal, @@ -654,12 +666,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, skb->data, skb->len, acked, GFP_ATOMIC); else - cfg80211_control_port_tx_status(&sdata->wdev, - cookie, - skb->data, - skb->len, - acked, - GFP_ATOMIC); + pr_warn("Unknown status report in ack skb\n"); + } rcu_read_unlock(); From 01da2e059dc326d02091a62b81a795a393e3719f Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 4 Jun 2020 23:41:57 +0200 Subject: [PATCH 035/343] mac80211: simplify mesh code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doing mod_timer() conditionaly is easier than conditionally unlocking and jumping around... Signed-off-by: Pavel Machek (CIP) Acked-by: Linus Lüssing Link: https://lore.kernel.org/r/20200604214157.GA9737@amd Signed-off-by: Johannes Berg --- net/mac80211/mesh_hwmp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index aa5150929996..02cde0fd08fe 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1105,11 +1105,8 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) ttl, lifetime, 0, ifmsh->preq_id++, sdata); spin_lock_bh(&mpath->state_lock); - if (mpath->flags & MESH_PATH_DELETED) { - spin_unlock_bh(&mpath->state_lock); - goto enddiscovery; - } - mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); + if (!(mpath->flags & MESH_PATH_DELETED)) + mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); spin_unlock_bh(&mpath->state_lock); enddiscovery: From 78fb5b541b7ae57ac39187ccb3097e606004cf9b Mon Sep 17 00:00:00 2001 From: Seevalamuthu Mariappan Date: Tue, 9 Jun 2020 15:45:54 +0530 Subject: [PATCH 036/343] mac80211: Fix dropping broadcast packets in 802.11 encap Broadcast pkts like arp are getting dropped in 'ieee80211_8023_xmit'. Fix this by replacing is_valid_ether_addr api with is_zero_ether_addr. Fixes: 50ff477a8639 ("mac80211: add 802.11 encapsulation offloading support") Signed-off-by: Seevalamuthu Mariappan Link: https://lore.kernel.org/r/1591697754-4975-1-git-send-email-seevalam@codeaurora.org Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e9ce658141f5..3374df016c58 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4206,7 +4206,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER))) ra = sdata->u.mgd.bssid; - if (!is_valid_ether_addr(ra)) + if (is_zero_ether_addr(ra)) goto out_free; multicast = is_multicast_ether_addr(ra); From 5af7fef39d7952c0f5551afa7b821ee7b6c9dd3d Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Wed, 17 Jun 2020 10:26:36 +0200 Subject: [PATCH 037/343] mac80211: skip mpath lookup also for control port tx When using 802.1X over mesh networks, at first an ordinary mesh peering is established, then the 802.1X EAPOL dialog happens, afterwards an authenticated mesh peering exchange (AMPE) happens, finally the peering is complete and we can set the STA authorized flag. As 802.1X is an intermediate step here and key material is not yet exchanged for stations we have to skip mesh path lookup for these EAPOL frames. Otherwise the already configure mesh group encryption key would be used to send a mesh path request which no one can decipher, because we didn't already establish key material on both peers, like with SAE and directly using AMPE. Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20200617082637.22670-2-markus.theil@tu-ilmenau.de [remove pointless braces, remove unnecessary local variable, the list can only process one such frame (or its fragments)] Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3374df016c58..1a2941e5244f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3996,6 +3996,9 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, skb_list_walk_safe(skb, skb, next) { skb_mark_not_on_list(skb); + if (skb->protocol == sdata->control_port_protocol) + ctrl_flags |= IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP; + skb = ieee80211_build_hdr(sdata, skb, info_flags, sta, ctrl_flags, cookie); if (IS_ERR(skb)) { @@ -5371,7 +5374,8 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; if (proto == sdata->control_port_protocol) - ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO; + ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO | + IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP; if (unencrypted) flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; From 0b467b63870d9c05c81456aa9bfee894ab2db3b6 Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Thu, 25 Jun 2020 12:42:14 +0200 Subject: [PATCH 038/343] mac80211: allow rx of mesh eapol frames with default rx key Without this patch, eapol frames cannot be received in mesh mode, when 802.1X should be used. Initially only a MGTK is defined, which is found and set as rx->key, when there are no other keys set. ieee80211_drop_unencrypted would then drop these eapol frames, as they are data frames without encryption and there exists some rx->key. Fix this by differentiating between mesh eapol frames and other data frames with existing rx->key. Allow mesh mesh eapol frames only if they are for our vif address. With this patch in-place, ieee80211_rx_h_mesh_fwding continues after the ieee80211_drop_unencrypted check and notices, that these eapol frames have to be delivered locally, as they should. Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20200625104214.50319-1-markus.theil@tu-ilmenau.de [small code cleanups] Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a88ab6fb16f2..5c5af4b5fc08 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2396,6 +2396,7 @@ static int ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx) static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) { + struct ieee80211_hdr *hdr = (void *)rx->skb->data; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); @@ -2406,6 +2407,31 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) if (status->flag & RX_FLAG_DECRYPTED) return 0; + /* check mesh EAPOL frames first */ + if (unlikely(rx->sta && ieee80211_vif_is_mesh(&rx->sdata->vif) && + ieee80211_is_data(fc))) { + struct ieee80211s_hdr *mesh_hdr; + u16 hdr_len = ieee80211_hdrlen(fc); + u16 ethertype_offset; + __be16 ethertype; + + if (!ether_addr_equal(hdr->addr1, rx->sdata->vif.addr)) + goto drop_check; + + /* make sure fixed part of mesh header is there, also checks skb len */ + if (!pskb_may_pull(rx->skb, hdr_len + 6)) + goto drop_check; + + mesh_hdr = (struct ieee80211s_hdr *)(skb->data + hdr_len); + ethertype_offset = hdr_len + ieee80211_get_mesh_hdrlen(mesh_hdr) + + sizeof(rfc1042_header); + + if (skb_copy_bits(rx->skb, ethertype_offset, ðertype, 2) == 0 && + ethertype == rx->sdata->control_port_protocol) + return 0; + } + +drop_check: /* Drop unencrypted frames if key is set. */ if (unlikely(!ieee80211_has_protected(fc) && !ieee80211_is_any_nullfunc(fc) && From 6a6ca7881b1ab1c13fe0d70bae29211a65dd90de Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 25 Jun 2020 16:38:33 +0800 Subject: [PATCH 039/343] ALSA: hda - let hs_mic be picked ahead of hp_mic We have a Dell AIO, there is neither internal speaker nor internal mic, only a multi-function audio jack on it. Users reported that after freshly installing the OS and plug a headset to the audio jack, the headset can't output sound. I reproduced this bug, at that moment, the Input Source is as below: Simple mixer control 'Input Source',0 Capabilities: cenum Items: 'Headphone Mic' 'Headset Mic' Item0: 'Headphone Mic' That is because the patch_realtek will set this audio jack as mic_in mode if Input Source's value is hp_mic. If it is not fresh installing, this issue will not happen since the systemd will run alsactl restore -f /var/lib/alsa/asound.state, this will set the 'Input Source' according to history value. If there is internal speaker or internal mic, this issue will not happen since there is valid sink/source in the pulseaudio, the PA will set the 'Input Source' according to active_port. To fix this issue, change the parser function to let the hs_mic be stored ahead of hp_mic. Cc: stable@vger.kernel.org Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20200625083833.11264-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_auto_parser.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 2c6d2becfe1a..824f4ac1a8ce 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -72,6 +72,12 @@ static int compare_input_type(const void *ap, const void *bp) if (a->type != b->type) return (int)(a->type - b->type); + /* If has both hs_mic and hp_mic, pick the hs_mic ahead of hp_mic. */ + if (a->is_headset_mic && b->is_headphone_mic) + return -1; /* don't swap */ + else if (a->is_headphone_mic && b->is_headset_mic) + return 1; /* swap */ + /* In case one has boost and the other one has not, pick the one with boost first. */ return (int)(b->has_boost_on_pin - a->has_boost_on_pin); From bc5c7f55f5ea91e137fc7939435ed2e2bb6e5a15 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Thu, 25 Jun 2020 13:58:29 +0200 Subject: [PATCH 040/343] AsoC: amd: add missing snd- module prefix to the acp3x-rn driver kernel module Signed-off-by: Jaroslav Kysela Acked-by: Alex Deucher Cc: Mark Brown Cc: vijendar.mukunda@amd.com Cc: Alexander.Deucher@amd.com Link: https://lore.kernel.org/r/20200625115829.791750-1-perex@perex.cz Signed-off-by: Mark Brown --- sound/soc/amd/renoir/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/amd/renoir/Makefile b/sound/soc/amd/renoir/Makefile index e4371932a55a..4a82690aec16 100644 --- a/sound/soc/amd/renoir/Makefile +++ b/sound/soc/amd/renoir/Makefile @@ -2,6 +2,7 @@ # Renoir platform Support snd-rn-pci-acp3x-objs := rn-pci-acp3x.o snd-acp3x-pdm-dma-objs := acp3x-pdm-dma.o -obj-$(CONFIG_SND_SOC_AMD_RENOIR) += snd-rn-pci-acp3x.o -obj-$(CONFIG_SND_SOC_AMD_RENOIR) += snd-acp3x-pdm-dma.o -obj-$(CONFIG_SND_SOC_AMD_RENOIR_MACH) += acp3x-rn.o +snd-acp3x-rn-objs := acp3x-rn.o +obj-$(CONFIG_SND_SOC_AMD_RENOIR) += snd-rn-pci-acp3x.o +obj-$(CONFIG_SND_SOC_AMD_RENOIR) += snd-acp3x-pdm-dma.o +obj-$(CONFIG_SND_SOC_AMD_RENOIR_MACH) += snd-acp3x-rn.o From a9b59159d338d414acaa8e2f569d129d51c76452 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 24 Jun 2020 15:20:39 -0700 Subject: [PATCH 041/343] bpf: Do not allow btf_ctx_access with __int128 types To ensure btf_ctx_access() is safe the verifier checks that the BTF arg type is an int, enum, or pointer. When the function does the BTF arg lookup it uses the calculation 'arg = off / 8' using the fact that registers are 8B. This requires that the first arg is in the first reg, the second in the second, and so on. However, for __int128 the arg will consume two registers by default LLVM implementation. So this will cause the arg layout assumed by the 'arg = off / 8' calculation to be incorrect. Because __int128 is uncommon this patch applies the easiest fix and will force int types to be sizeof(u64) or smaller so that they will fit in a single register. v2: remove unneeded parens per Andrii's feedback Fixes: 9e15db66136a1 ("bpf: Implement accurate raw_tp context access via BTF") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/159303723962.11287.13309537171132420717.stgit@john-Precision-5820-Tower --- include/linux/btf.h | 5 +++++ kernel/bpf/btf.c | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index 5c1ea99b480f..8b81fbb4497c 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -82,6 +82,11 @@ static inline bool btf_type_is_int(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_INT; } +static inline bool btf_type_is_small_int(const struct btf_type *t) +{ + return btf_type_is_int(t) && t->size <= sizeof(u64); +} + static inline bool btf_type_is_enum(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 58c9af1d4808..9a1a98dd9e97 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3746,7 +3746,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, return false; t = btf_type_skip_modifiers(btf, t->type, NULL); - if (!btf_type_is_int(t)) { + if (!btf_type_is_small_int(t)) { bpf_log(log, "ret type %s not allowed for fmod_ret\n", btf_kind_str[BTF_INFO_KIND(t->info)]); @@ -3768,7 +3768,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, /* skip modifiers */ while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (btf_type_is_int(t) || btf_type_is_enum(t)) + if (btf_type_is_small_int(t) || btf_type_is_enum(t)) /* accessing a scalar */ return true; if (!btf_type_is_ptr(t)) { From 7a64135f3229a808067e4bd29be15fe6856a9ae6 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 25 Jun 2020 16:26:58 +0200 Subject: [PATCH 042/343] libbpf: Adjust SEC short cut for expected attach type BPF_XDP_DEVMAP Adjust the SEC("xdp_devmap/") prog type prefix to contain a slash "/" for expected attach type BPF_XDP_DEVMAP. This is consistent with other prog types like tracing. Fixes: 2778797037a6 ("libbpf: Add SEC name for xdp programs attached to device map") Suggested-by: Andrii Nakryiko Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/159309521882.821855.6873145686353617509.stgit@firesoul --- tools/lib/bpf/libbpf.c | 2 +- .../testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f17151d866e6..11e4725b8b1c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6659,7 +6659,7 @@ static const struct bpf_sec_def section_defs[] = { .expected_attach_type = BPF_TRACE_ITER, .is_attach_btf = true, .attach_fn = attach_iter), - BPF_EAPROG_SEC("xdp_devmap", BPF_PROG_TYPE_XDP, + BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP, BPF_XDP_DEVMAP), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c index 330811260123..0ac086497722 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c @@ -27,7 +27,7 @@ int xdp_dummy_prog(struct xdp_md *ctx) /* valid program on DEVMAP entry via SEC name; * has access to egress and ingress ifindex */ -SEC("xdp_devmap") +SEC("xdp_devmap/map_prog") int xdp_dummy_dm(struct xdp_md *ctx) { char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n"; From bc7a39b4272b9672d806d422b6850e8c1a09914c Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 26 Jun 2020 12:49:39 +0300 Subject: [PATCH 043/343] nl80211: don't return err unconditionally in nl80211_start_ap() When a memory leak was fixed, a return err was changed to goto err, but, accidentally, the if (err) was removed, so now we always exit at this point. Fix it by adding if (err) back. Fixes: 9951ebfcdf2b ("nl80211: fix potential leak in AP start") Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20200626124931.871ba5b31eee.I97340172d92164ee92f3c803fe20a8a6e97714e1@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 263ae395ad44..f31698fd4a7e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5016,7 +5016,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_he_obss_pd( info->attrs[NL80211_ATTR_HE_OBSS_PD], ¶ms.he_obss_pd); - goto out; + if (err) + goto out; } if (info->attrs[NL80211_ATTR_HE_BSS_COLOR]) { From 60a0121f8fa64b0f4297aa6fef8207500483a874 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 26 Jun 2020 12:49:40 +0300 Subject: [PATCH 044/343] nl80211: fix memory leak when parsing NL80211_ATTR_HE_BSS_COLOR If there is an error when parsing the NL80211_ATTR_HE_BSS_COLOR attribute, we return immediately without freeing param.acl. Fit it by using goto out instead of returning immediately. Fixes: 5c5e52d1bb96 ("nl80211: add handling for BSS color") Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20200626124931.7ad2a3eb894f.I60905fb70bd20389a3b170db515a07275e31845e@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f31698fd4a7e..0e07fb8585fb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5025,7 +5025,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_HE_BSS_COLOR], ¶ms.he_bss_color); if (err) - return err; + goto out; } nl80211_calculate_ap_params(¶ms); From 45c11a927606c612e4898a9484867b71318699f6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 6 Jun 2020 11:31:50 +0200 Subject: [PATCH 045/343] pinctrl: baytrail: Fix pin being driven low for a while on gpiod_get(..., GPIOD_OUT_HIGH) The pins on the Bay Trail SoC have separate input-buffer and output-buffer enable bits and a read of the level bit of the value register will always return the value from the input-buffer. The BIOS of a device may configure a pin in output-only mode, only enabling the output buffer, and write 1 to the level bit to drive the pin high. This 1 written to the level bit will be stored inside the data-latch of the output buffer. But a subsequent read of the value register will return 0 for the level bit because the input-buffer is disabled. This causes a read-modify-write as done by byt_gpio_set_direction() to write 0 to the level bit, driving the pin low! Before this commit byt_gpio_direction_output() relied on pinctrl_gpio_direction_output() to set the direction, followed by a call to byt_gpio_set() to apply the selected value. This causes the pin to go low between the pinctrl_gpio_direction_output() and byt_gpio_set() calls. Change byt_gpio_direction_output() to directly make the register modifications itself instead. Replacing the 2 subsequent writes to the value register with a single write. Note that the pinctrl code does not keep track internally of the direction, so not going through pinctrl_gpio_direction_output() is not an issue. This issue was noticed on a Trekstor SurfTab Twin 10.1. When the panel is already on at boot (no external monitor connected), then the i915 driver does a gpiod_get(..., GPIOD_OUT_HIGH) for the panel-enable GPIO. The temporarily going low of that GPIO was causing the panel to reset itself after which it would not show an image until it was turned off and back on again (until a full modeset was done on it). This commit fixes this. This commit also updates the byt_gpio_direction_input() to use direct register accesses instead of going through pinctrl_gpio_direction_input(), to keep it consistent with byt_gpio_direction_output(). Note for backporting, this commit depends on: commit e2b74419e5cc ("pinctrl: baytrail: Replace WARN with dev_info_once when setting direct-irq pin to output") Cc: stable@vger.kernel.org Fixes: 86e3ef812fe3 ("pinctrl: baytrail: Update gpio chip operations") Signed-off-by: Hans de Goede Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-baytrail.c | 67 +++++++++++++++++++----- 1 file changed, 53 insertions(+), 14 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 0ff7c55173da..615174a9d1e0 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -800,6 +800,21 @@ static void byt_gpio_disable_free(struct pinctrl_dev *pctl_dev, pm_runtime_put(vg->dev); } +static void byt_gpio_direct_irq_check(struct intel_pinctrl *vg, + unsigned int offset) +{ + void __iomem *conf_reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG); + + /* + * Before making any direction modifications, do a check if gpio is set + * for direct IRQ. On Bay Trail, setting GPIO to output does not make + * sense, so let's at least inform the caller before they shoot + * themselves in the foot. + */ + if (readl(conf_reg) & BYT_DIRECT_IRQ_EN) + dev_info_once(vg->dev, "Potential Error: Setting GPIO with direct_irq_en to output"); +} + static int byt_gpio_set_direction(struct pinctrl_dev *pctl_dev, struct pinctrl_gpio_range *range, unsigned int offset, @@ -807,7 +822,6 @@ static int byt_gpio_set_direction(struct pinctrl_dev *pctl_dev, { struct intel_pinctrl *vg = pinctrl_dev_get_drvdata(pctl_dev); void __iomem *val_reg = byt_gpio_reg(vg, offset, BYT_VAL_REG); - void __iomem *conf_reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG); unsigned long flags; u32 value; @@ -817,14 +831,8 @@ static int byt_gpio_set_direction(struct pinctrl_dev *pctl_dev, value &= ~BYT_DIR_MASK; if (input) value |= BYT_OUTPUT_EN; - else if (readl(conf_reg) & BYT_DIRECT_IRQ_EN) - /* - * Before making any direction modifications, do a check if gpio - * is set for direct IRQ. On baytrail, setting GPIO to output - * does not make sense, so let's at least inform the caller before - * they shoot themselves in the foot. - */ - dev_info_once(vg->dev, "Potential Error: Setting GPIO with direct_irq_en to output"); + else + byt_gpio_direct_irq_check(vg, offset); writel(value, val_reg); @@ -1165,19 +1173,50 @@ static int byt_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) static int byt_gpio_direction_input(struct gpio_chip *chip, unsigned int offset) { - return pinctrl_gpio_direction_input(chip->base + offset); + struct intel_pinctrl *vg = gpiochip_get_data(chip); + void __iomem *val_reg = byt_gpio_reg(vg, offset, BYT_VAL_REG); + unsigned long flags; + u32 reg; + + raw_spin_lock_irqsave(&byt_lock, flags); + + reg = readl(val_reg); + reg &= ~BYT_DIR_MASK; + reg |= BYT_OUTPUT_EN; + writel(reg, val_reg); + + raw_spin_unlock_irqrestore(&byt_lock, flags); + return 0; } +/* + * Note despite the temptation this MUST NOT be converted into a call to + * pinctrl_gpio_direction_output() + byt_gpio_set() that does not work this + * MUST be done as a single BYT_VAL_REG register write. + * See the commit message of the commit adding this comment for details. + */ static int byt_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, int value) { - int ret = pinctrl_gpio_direction_output(chip->base + offset); + struct intel_pinctrl *vg = gpiochip_get_data(chip); + void __iomem *val_reg = byt_gpio_reg(vg, offset, BYT_VAL_REG); + unsigned long flags; + u32 reg; - if (ret) - return ret; + raw_spin_lock_irqsave(&byt_lock, flags); - byt_gpio_set(chip, offset, value); + byt_gpio_direct_irq_check(vg, offset); + reg = readl(val_reg); + reg &= ~BYT_DIR_MASK; + if (value) + reg |= BYT_LEVEL; + else + reg &= ~BYT_LEVEL; + + writel(reg, val_reg); + + raw_spin_unlock_irqrestore(&byt_lock, flags); return 0; } From fa48494cce5f6360b0f8683cdf258fb45c666287 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 25 Jun 2020 22:58:37 -0700 Subject: [PATCH 046/343] ionic: update the queue count on open Let the network stack know the real number of queues that we are using. v2: added error checking Fixes: 49d3b493673a ("ionic: disable the queues on link down") Signed-off-by: Shannon Nelson Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index aaa00edd9d5b..3c9dde31f3fa 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1673,6 +1673,14 @@ int ionic_open(struct net_device *netdev) if (err) goto err_out; + err = netif_set_real_num_tx_queues(netdev, lif->nxqs); + if (err) + goto err_txrx_deinit; + + err = netif_set_real_num_rx_queues(netdev, lif->nxqs); + if (err) + goto err_txrx_deinit; + /* don't start the queues until we have link */ if (netif_carrier_ok(netdev)) { err = ionic_start_queues(lif); From 0574e2000fc3103cbc69ba82ec1175ce171fdf5e Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Fri, 26 Jun 2020 19:17:29 +0300 Subject: [PATCH 047/343] enetc: Fix tx rings bitmap iteration range, irq handling The rings bitmap of an interrupt vector encodes which of the device's rings were assigned to that interrupt vector. Hence the iteration range of the tx rings bitmap (for_each_set_bit()) should be the total number of Tx rings of that netdevice instead of the number of rings assigned to the interrupt vector. Since there are 2 cores, and one interrupt vector for each core, the number of rings asigned to an interrupt vector is half the number of available rings. The impact of this error is that the upper half of the tx rings could still generate interrupts during napi polling. Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 96831f49925c..22105d09bc89 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -266,7 +266,7 @@ static irqreturn_t enetc_msix(int irq, void *data) /* disable interrupts */ enetc_wr_reg(v->rbier, 0); - for_each_set_bit(i, &v->tx_rings_map, v->count_tx_rings) + for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS) enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), 0); napi_schedule_irqoff(&v->napi); @@ -302,7 +302,7 @@ static int enetc_poll(struct napi_struct *napi, int budget) /* enable interrupts */ enetc_wr_reg(v->rbier, ENETC_RBIER_RXTIE); - for_each_set_bit(i, &v->tx_rings_map, v->count_tx_rings) + for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS) enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), ENETC_TBIER_TXTIE); From e09a7f87e5cb3b564ad1d98d4d2f678504d6df19 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Thu, 9 Apr 2020 14:02:09 +0800 Subject: [PATCH 048/343] drm/mediatek: Fix device passed to cmdq drm device is now probed from mmsys. We need to use mmsys device to get gce nodes. Fix following errors: [ 0.740068] mediatek-drm mediatek-drm.1.auto: error -2 can't parse gce-client-reg property (0) [ 0.748721] mediatek-drm mediatek-drm.1.auto: error -2 can't parse gce-client-reg property (0) ... [ 2.659645] mediatek-drm mediatek-drm.1.auto: failed to request channel [ 2.666270] mediatek-drm mediatek-drm.1.auto: failed to request channel Fixes: 667c769246b0 ("soc / drm: mediatek: Fix mediatek-drm device probing") Signed-off-by: Hsin-Yi Wang Reviewed-by: Enric Balletbo i Serra Tested-by: Enric Balletbo i Serra Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 3 ++- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index fe46c4bac64d..feedbac027d9 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -831,7 +831,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, #if IS_REACHABLE(CONFIG_MTK_CMDQ) mtk_crtc->cmdq_client = - cmdq_mbox_create(dev, drm_crtc_index(&mtk_crtc->base), + cmdq_mbox_create(mtk_crtc->mmsys_dev, + drm_crtc_index(&mtk_crtc->base), 2000); if (IS_ERR(mtk_crtc->cmdq_client)) { dev_dbg(dev, "mtk_crtc %d failed to create mailbox client, writing register by CPU now\n", diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 6bd369434d9d..f953d3746e61 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -514,7 +514,8 @@ static int mtk_drm_probe(struct platform_device *pdev) goto err_node; } - ret = mtk_ddp_comp_init(dev, node, comp, comp_id, NULL); + ret = mtk_ddp_comp_init(dev->parent, node, comp, + comp_id, NULL); if (ret) { of_node_put(node); goto err_node; From a11398ef87ae38e008c69bea143513e9a36a6577 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 26 Feb 2020 12:27:23 +0100 Subject: [PATCH 049/343] drm/mediatek: Remove debug messages for function calls Equivalent information can be nowadays obtained using function tracer. Signed-off-by: Enric Balletbo i Serra Reviewed-by: CK Hu Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 5 ----- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 2 -- 2 files changed, 7 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index feedbac027d9..7cd8f415fd02 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -193,7 +193,6 @@ static int mtk_crtc_ddp_clk_enable(struct mtk_drm_crtc *mtk_crtc) int ret; int i; - DRM_DEBUG_DRIVER("%s\n", __func__); for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { ret = clk_prepare_enable(mtk_crtc->ddp_comp[i]->clk); if (ret) { @@ -213,7 +212,6 @@ static void mtk_crtc_ddp_clk_disable(struct mtk_drm_crtc *mtk_crtc) { int i; - DRM_DEBUG_DRIVER("%s\n", __func__); for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) clk_disable_unprepare(mtk_crtc->ddp_comp[i]->clk); } @@ -258,7 +256,6 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) int ret; int i; - DRM_DEBUG_DRIVER("%s\n", __func__); if (WARN_ON(!crtc->state)) return -EINVAL; @@ -299,7 +296,6 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) goto err_mutex_unprepare; } - DRM_DEBUG_DRIVER("mediatek_ddp_ddp_path_setup\n"); for (i = 0; i < mtk_crtc->ddp_comp_nr - 1; i++) { mtk_mmsys_ddp_connect(mtk_crtc->mmsys_dev, mtk_crtc->ddp_comp[i]->id, @@ -349,7 +345,6 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) struct drm_crtc *crtc = &mtk_crtc->base; int i; - DRM_DEBUG_DRIVER("%s\n", __func__); for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { mtk_ddp_comp_stop(mtk_crtc->ddp_comp[i]); if (i == 1) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index f953d3746e61..c043ec6c8166 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -572,7 +572,6 @@ static int mtk_drm_sys_suspend(struct device *dev) int ret; ret = drm_mode_config_helper_suspend(drm); - DRM_DEBUG_DRIVER("mtk_drm_sys_suspend\n"); return ret; } @@ -584,7 +583,6 @@ static int mtk_drm_sys_resume(struct device *dev) int ret; ret = drm_mode_config_helper_resume(drm); - DRM_DEBUG_DRIVER("mtk_drm_sys_resume\n"); return ret; } From c79484f68b164a4b59c85b7a5008455ddd7af6fb Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 29 Apr 2020 15:13:37 +0800 Subject: [PATCH 050/343] drm/mediatek: Fix Kconfig warning WARNING: unmet direct dependencies detected for MTK_MMSYS Depends on [n]: (ARCH_MEDIATEK [=y] || COMPILE_TEST [=n]) && COMMON_CLK_MT8173_MMSYS [=n] Selected by [y]: - DRM_MEDIATEK [=y] && HAS_IOMEM [=y] && DRM [=y] && (ARCH_MEDIATEK [=y] || ARM && COMPILE_TEST [=n]) && COMMON_CLK [=y] && HAVE_ARM_SMCCC [=y] && OF [=y] Make DRM_MEDIATEK depend on MTK_MMSYS to fix this. Fixes: 2c758e301ed9 ("soc / drm: mediatek: Move routing control to mmsys device") Signed-off-by: YueHaibing Reviewed-by: Enric Balletbo i Serra Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index c420f5a3d33b..aa74aac3cbcc 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -6,12 +6,12 @@ config DRM_MEDIATEK depends on COMMON_CLK depends on HAVE_ARM_SMCCC depends on OF + depends on MTK_MMSYS select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER select DRM_MIPI_DSI select DRM_PANEL select MEMORY - select MTK_MMSYS select MTK_SMI select VIDEOMODE_HELPERS help From c0b8892e2461b5fa740e47efbb1269a487b04020 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Mon, 22 Jun 2020 23:57:53 +0800 Subject: [PATCH 051/343] drm/mediatek: Check plane visibility in atomic_update Disable the plane if it's not visible. Otherwise mtk_ovl_layer_config() would proceed with invalid plane and we may see vblank timeout. Fixes: 119f5173628a ("drm/mediatek: Add DRM Driver for Mediatek SoC MT8173.") Signed-off-by: Hsin-Yi Wang Reviewed-by: Tomasz Figa Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_plane.c | 25 ++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index c2bd683a87c8..92141a19681b 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -164,6 +164,16 @@ static int mtk_plane_atomic_check(struct drm_plane *plane, true, true); } +static void mtk_plane_atomic_disable(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + struct mtk_plane_state *state = to_mtk_plane_state(plane->state); + + state->pending.enable = false; + wmb(); /* Make sure the above parameter is set before update */ + state->pending.dirty = true; +} + static void mtk_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { @@ -178,6 +188,11 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, if (!crtc || WARN_ON(!fb)) return; + if (!plane->state->visible) { + mtk_plane_atomic_disable(plane, old_state); + return; + } + gem = fb->obj[0]; mtk_gem = to_mtk_gem_obj(gem); addr = mtk_gem->dma_addr; @@ -200,16 +215,6 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, state->pending.dirty = true; } -static void mtk_plane_atomic_disable(struct drm_plane *plane, - struct drm_plane_state *old_state) -{ - struct mtk_plane_state *state = to_mtk_plane_state(plane->state); - - state->pending.enable = false; - wmb(); /* Make sure the above parameter is set before update */ - state->pending.dirty = true; -} - static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = { .prepare_fb = drm_gem_fb_prepare_fb, .atomic_check = mtk_plane_atomic_check, From 2ce578ca9444bb44da66b9a494f56e7ec12e6466 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 27 Jun 2020 15:47:51 +0800 Subject: [PATCH 052/343] net: ipv4: Fix wrong type conversion from hint to rt in ip_route_use_hint() We can't cast sk_buff to rtable by (struct rtable *)hint. Use skb_rtable(). Fixes: 02b24941619f ("ipv4: use dst hint for ipv4 list receive") Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1d7076b78e63..a01efa062f6b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2027,7 +2027,7 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, const struct sk_buff *hint) { struct in_device *in_dev = __in_dev_get_rcu(dev); - struct rtable *rt = (struct rtable *)hint; + struct rtable *rt = skb_rtable(hint); struct net *net = dev_net(dev); int err = -EINVAL; u32 tag = 0; From 93dd5f185916b05e931cffae636596f21f98546e Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 25 Jun 2020 16:12:59 -0700 Subject: [PATCH 053/343] bpf, sockmap: RCU splat with redirect and strparser error or TLS There are two paths to generate the below RCU splat the first and most obvious is the result of the BPF verdict program issuing a redirect on a TLS socket (This is the splat shown below). Unlike the non-TLS case the caller of the *strp_read() hooks does not wrap the call in a rcu_read_lock/unlock. Then if the BPF program issues a redirect action we hit the RCU splat. However, in the non-TLS socket case the splat appears to be relatively rare, because the skmsg caller into the strp_data_ready() is wrapped in a rcu_read_lock/unlock. Shown here, static void sk_psock_strp_data_ready(struct sock *sk) { struct sk_psock *psock; rcu_read_lock(); psock = sk_psock(sk); if (likely(psock)) { if (tls_sw_has_ctx_rx(sk)) { psock->parser.saved_data_ready(sk); } else { write_lock_bh(&sk->sk_callback_lock); strp_data_ready(&psock->parser.strp); write_unlock_bh(&sk->sk_callback_lock); } } rcu_read_unlock(); } If the above was the only way to run the verdict program we would be safe. But, there is a case where the strparser may throw an ENOMEM error while parsing the skb. This is a result of a failed skb_clone, or alloc_skb_for_msg while building a new merged skb when the msg length needed spans multiple skbs. This will in turn put the skb on the strp_wrk workqueue in the strparser code. The skb will later be dequeued and verdict programs run, but now from a different context without the rcu_read_lock()/unlock() critical section in sk_psock_strp_data_ready() shown above. In practice I have not seen this yet, because as far as I know most users of the verdict programs are also only working on single skbs. In this case no merge happens which could trigger the above ENOMEM errors. In addition the system would need to be under memory pressure. For example, we can't hit the above case in selftests because we missed having tests to merge skbs. (Added in later patch) To fix the below splat extend the rcu_read_lock/unnlock block to include the call to sk_psock_tls_verdict_apply(). This will fix both TLS redirect case and non-TLS redirect+error case. Also remove psock from the sk_psock_tls_verdict_apply() function signature its not used there. [ 1095.937597] WARNING: suspicious RCU usage [ 1095.940964] 5.7.0-rc7-02911-g463bac5f1ca79 #1 Tainted: G W [ 1095.944363] ----------------------------- [ 1095.947384] include/linux/skmsg.h:284 suspicious rcu_dereference_check() usage! [ 1095.950866] [ 1095.950866] other info that might help us debug this: [ 1095.950866] [ 1095.957146] [ 1095.957146] rcu_scheduler_active = 2, debug_locks = 1 [ 1095.961482] 1 lock held by test_sockmap/15970: [ 1095.964501] #0: ffff9ea6b25de660 (sk_lock-AF_INET){+.+.}-{0:0}, at: tls_sw_recvmsg+0x13a/0x840 [tls] [ 1095.968568] [ 1095.968568] stack backtrace: [ 1095.975001] CPU: 1 PID: 15970 Comm: test_sockmap Tainted: G W 5.7.0-rc7-02911-g463bac5f1ca79 #1 [ 1095.977883] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 [ 1095.980519] Call Trace: [ 1095.982191] dump_stack+0x8f/0xd0 [ 1095.984040] sk_psock_skb_redirect+0xa6/0xf0 [ 1095.986073] sk_psock_tls_strp_read+0x1d8/0x250 [ 1095.988095] tls_sw_recvmsg+0x714/0x840 [tls] v2: Improve commit message to identify non-TLS redirect plus error case condition as well as more common TLS case. In the process I decided doing the rcu_read_unlock followed by the lock/unlock inside branches was unnecessarily complex. We can just extend the current rcu block and get the same effeective without the shuffling and branching. Thanks Martin! Fixes: e91de6afa81c1 ("bpf: Fix running sk_skb program types with ktls") Reported-by: Jakub Sitnicki Reported-by: kernel test robot Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/159312677907.18340.11064813152758406626.stgit@john-XPS-13-9370 --- net/core/skmsg.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 351afbf6bfba..c41ab6906b21 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -683,7 +683,7 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp) return container_of(parser, struct sk_psock, parser); } -static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) +static void sk_psock_skb_redirect(struct sk_buff *skb) { struct sk_psock *psock_other; struct sock *sk_other; @@ -715,12 +715,11 @@ static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) } } -static void sk_psock_tls_verdict_apply(struct sk_psock *psock, - struct sk_buff *skb, int verdict) +static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict) { switch (verdict) { case __SK_REDIRECT: - sk_psock_skb_redirect(psock, skb); + sk_psock_skb_redirect(skb); break; case __SK_PASS: case __SK_DROP: @@ -741,8 +740,8 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb) ret = sk_psock_bpf_run(psock, prog, skb); ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); } + sk_psock_tls_verdict_apply(skb, ret); rcu_read_unlock(); - sk_psock_tls_verdict_apply(psock, skb, ret); return ret; } EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); @@ -770,7 +769,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock, } goto out_free; case __SK_REDIRECT: - sk_psock_skb_redirect(psock, skb); + sk_psock_skb_redirect(skb); break; case __SK_DROP: /* fall-through */ @@ -794,8 +793,8 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) ret = sk_psock_bpf_run(psock, prog, skb); ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); } - rcu_read_unlock(); sk_psock_verdict_apply(psock, skb, ret); + rcu_read_unlock(); } static int sk_psock_strp_read_done(struct strparser *strp, int err) From 8025751d4d55a2f32be6bdf825b6a80c299875f5 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 25 Jun 2020 16:13:18 -0700 Subject: [PATCH 054/343] bpf, sockmap: RCU dereferenced psock may be used outside RCU block If an ingress verdict program specifies message sizes greater than skb->len and there is an ENOMEM error due to memory pressure we may call the rcv_msg handler outside the strp_data_ready() caller context. This is because on an ENOMEM error the strparser will retry from a workqueue. The caller currently protects the use of psock by calling the strp_data_ready() inside a rcu_read_lock/unlock block. But, in above workqueue error case the psock is accessed outside the read_lock/unlock block of the caller. So instead of using psock directly we must do a look up against the sk again to ensure the psock is available. There is an an ugly piece here where we must handle the case where we paused the strp and removed the psock. On psock removal we first pause the strparser and then remove the psock. If the strparser is paused while an skb is scheduled on the workqueue the skb will be dropped on the flow and kfree_skb() is called. If the workqueue manages to get called before we pause the strparser but runs the rcvmsg callback after the psock is removed we will hit the unlikely case where we run the sockmap rcvmsg handler but do not have a psock. For now we will follow strparser logic and drop the skb on the floor with skb_kfree(). This is ugly because the data is dropped. To date this has not caused problems in practice because either the application controlling the sockmap is coordinating with the datapath so that skbs are "flushed" before removal or we simply wait for the sock to be closed before removing it. This patch fixes the describe RCU bug and dropping the skb doesn't make things worse. Future patches will improve this by allowing the normal case where skbs are not merged to skip the strparser altogether. In practice many (most?) use cases have no need to merge skbs so its both a code complexity hit as seen above and a performance issue. For example, in the Cilium case we always set the strparser up to return sbks 1:1 without any merging and have avoided above issues. Fixes: e91de6afa81c1 ("bpf: Fix running sk_skb program types with ktls") Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/159312679888.18340.15248924071966273998.stgit@john-XPS-13-9370 --- net/core/skmsg.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index c41ab6906b21..6a32a1fd34f8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -781,11 +781,18 @@ static void sk_psock_verdict_apply(struct sk_psock *psock, static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) { - struct sk_psock *psock = sk_psock_from_strp(strp); + struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; + struct sock *sk; rcu_read_lock(); + sk = strp->sk; + psock = sk_psock(sk); + if (unlikely(!psock)) { + kfree_skb(skb); + goto out; + } prog = READ_ONCE(psock->progs.skb_verdict); if (likely(prog)) { skb_orphan(skb); @@ -794,6 +801,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); } sk_psock_verdict_apply(psock, skb, ret); +out: rcu_read_unlock(); } From 53792fa45b1b17f78f18bcd0bd167674341297e8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 25 Jun 2020 16:13:38 -0700 Subject: [PATCH 055/343] bpf, sockmap: Add ingres skb tests that utilize merge skbs Add a test to check strparser merging skbs is working. Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/159312681884.18340.4922800172600252370.stgit@john-XPS-13-9370 --- .../selftests/bpf/progs/test_sockmap_kern.h | 8 +++++++- tools/testing/selftests/bpf/test_sockmap.c | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 057036ca1111..3dca4c2e2418 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -79,7 +79,7 @@ struct { struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 2); + __uint(max_entries, 3); __type(key, int); __type(value, int); } sock_skb_opts SEC(".maps"); @@ -94,6 +94,12 @@ struct { SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) { + int *f, two = 2; + + f = bpf_map_lookup_elem(&sock_skb_opts, &two); + if (f && *f) { + return *f; + } return skb->len; } diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 37695fc8096a..78789b27e573 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -85,6 +85,7 @@ int txmsg_ktls_skb_drop; int txmsg_ktls_skb_redir; int ktls; int peek_flag; +int skb_use_parser; static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, @@ -174,6 +175,7 @@ static void test_reset(void) txmsg_apply = txmsg_cork = 0; txmsg_ingress = txmsg_redir_skb = 0; txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0; + skb_use_parser = 0; } static int test_start_subtest(const struct _test *t, struct sockmap_options *o) @@ -1211,6 +1213,11 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) } } + if (skb_use_parser) { + i = 2; + err = bpf_map_update_elem(map_fd[7], &i, &skb_use_parser, BPF_ANY); + } + if (txmsg_drop) options->drop_expected = true; @@ -1650,6 +1657,16 @@ static void test_txmsg_cork(int cgrp, struct sockmap_options *opt) test_send(opt, cgrp); } +static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt) +{ + txmsg_pass = 1; + skb_use_parser = 512; + opt->iov_length = 256; + opt->iov_count = 1; + opt->rate = 2; + test_exec(cgrp, opt); +} + char *map_names[] = { "sock_map", "sock_map_txmsg", @@ -1748,6 +1765,7 @@ struct _test test[] = { {"txmsg test pull-data", test_txmsg_pull}, {"txmsg test pop-data", test_txmsg_pop}, {"txmsg test push/pop data", test_txmsg_push_pop}, + {"txmsg text ingress parser", test_txmsg_ingress_parser}, }; static int check_whitelist(struct _test *t, struct sockmap_options *opt) From 9b14d1f8a76682124c5e465196685a9833ff526e Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sun, 28 Jun 2020 15:45:16 +0200 Subject: [PATCH 056/343] bridge: mrp: Fix endian conversion and some other warnings The following sparse warnings are fixed: net/bridge/br_mrp.c:106:18: warning: incorrect type in assignment (different base types) net/bridge/br_mrp.c:106:18: expected unsigned short [usertype] net/bridge/br_mrp.c:106:18: got restricted __be16 [usertype] net/bridge/br_mrp.c:281:23: warning: incorrect type in argument 1 (different modifiers) net/bridge/br_mrp.c:281:23: expected struct list_head *entry net/bridge/br_mrp.c:281:23: got struct list_head [noderef] * net/bridge/br_mrp.c:332:28: warning: incorrect type in argument 1 (different modifiers) net/bridge/br_mrp.c:332:28: expected struct list_head *new net/bridge/br_mrp.c:332:28: got struct list_head [noderef] * net/bridge/br_mrp.c:332:40: warning: incorrect type in argument 2 (different modifiers) net/bridge/br_mrp.c:332:40: expected struct list_head *head net/bridge/br_mrp.c:332:40: got struct list_head [noderef] * net/bridge/br_mrp.c:682:29: warning: incorrect type in argument 1 (different modifiers) net/bridge/br_mrp.c:682:29: expected struct list_head const *head net/bridge/br_mrp.c:682:29: got struct list_head [noderef] * Reported-by: kernel test robot Fixes: 2f1a11ae11d222 ("bridge: mrp: Add MRP interface.") Fixes: 4b8d7d4c599182 ("bridge: mrp: Extend bridge interface") Fixes: 9a9f26e8f7ea30 ("bridge: mrp: Connect MRP API with the switchdev API") Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mrp.c | 2 +- net/bridge/br_private.h | 2 +- net/bridge/br_private_mrp.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index 779e1eb75443..90592af9db61 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -86,7 +86,7 @@ static struct sk_buff *br_mrp_skb_alloc(struct net_bridge_port *p, { struct ethhdr *eth_hdr; struct sk_buff *skb; - u16 *version; + __be16 *version; skb = dev_alloc_skb(MRP_MAX_FRAME_LENGTH); if (!skb) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2130fe0194e6..e0ea6dbbc97e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -430,7 +430,7 @@ struct net_bridge { struct hlist_head fdb_list; #if IS_ENABLED(CONFIG_BRIDGE_MRP) - struct list_head __rcu mrp_list; + struct list_head mrp_list; #endif }; diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index 33b255e38ffe..315eb37d89f0 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -8,7 +8,7 @@ struct br_mrp { /* list of mrp instances */ - struct list_head __rcu list; + struct list_head list; struct net_bridge_port __rcu *p_port; struct net_bridge_port __rcu *s_port; From 7dea927f702df030c02bd0c9e6e320a8315e3efa Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 28 Jun 2020 17:49:35 +0300 Subject: [PATCH 057/343] lib: packing: add documentation for pbuflen argument Fixes sparse warning: Function parameter or member 'pbuflen' not described in 'packing' Fixes: 554aae35007e ("lib: Add support for generic packing operations") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- lib/packing.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/packing.c b/lib/packing.c index 50d1e9f2f5a7..6ed72dccfdb5 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -73,6 +73,7 @@ static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, * @endbit: The index (in logical notation, compensated for quirks) where * the packed value ends within pbuf. Must be smaller than, or equal * to, startbit. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. * @op: If PACK, then uval will be treated as const pointer and copied (packed) * into pbuf, between startbit and endbit. * If UNPACK, then pbuf will be treated as const pointer and the logical From be74294ffa24f5fbc0d6643842e3e095447e17a2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 26 Jun 2020 11:24:22 -0700 Subject: [PATCH 058/343] net: get rid of lockdep_set_class_and_subclass() lockdep_set_class_and_subclass() is meant to reduce the _nested() annotations by assigning a default subclass. For addr_list_lock, we have to compute the subclass at run-time as the netdevice topology changes after creation. So, we should just get rid of these lockdep_set_class_and_subclass() and stick with our _nested() annotations. Fixes: 845e0ebb4408 ("net: change addr_list_lock back to static key") Suggested-by: Taehee Yoo Cc: Dmitry Vyukov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/macsec.c | 5 ++--- drivers/net/macvlan.c | 5 ++--- net/8021q/vlan_dev.c | 9 ++++----- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index e56547bfdac9..9159846b8b93 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -4052,9 +4052,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev, return err; netdev_lockdep_set_classes(dev); - lockdep_set_class_and_subclass(&dev->addr_list_lock, - &macsec_netdev_addr_lock_key, - dev->lower_level); + lockdep_set_class(&dev->addr_list_lock, + &macsec_netdev_addr_lock_key); err = netdev_upper_dev_link(real_dev, dev, extack); if (err < 0) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 6a6cc9f75307..4942f6112e51 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -880,9 +880,8 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; static void macvlan_set_lockdep_class(struct net_device *dev) { netdev_lockdep_set_classes(dev); - lockdep_set_class_and_subclass(&dev->addr_list_lock, - &macvlan_netdev_addr_lock_key, - dev->lower_level); + lockdep_set_class(&dev->addr_list_lock, + &macvlan_netdev_addr_lock_key); } static int macvlan_init(struct net_device *dev) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index c8d6a07e23c5..3dd7c972677b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -503,11 +503,10 @@ static void vlan_dev_set_lockdep_one(struct net_device *dev, lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key); } -static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass) +static void vlan_dev_set_lockdep_class(struct net_device *dev) { - lockdep_set_class_and_subclass(&dev->addr_list_lock, - &vlan_netdev_addr_lock_key, - subclass); + lockdep_set_class(&dev->addr_list_lock, + &vlan_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL); } @@ -601,7 +600,7 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); - vlan_dev_set_lockdep_class(dev, dev->lower_level); + vlan_dev_set_lockdep_class(dev); vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->vlan_pcpu_stats) From e8280338c778a3f81477624267c9fa47f931477b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 26 Jun 2020 11:25:27 -0700 Subject: [PATCH 059/343] net: explain the lockdep annotations for dev_uc_unsync() The lockdep annotations for dev_uc_unsync() and dev_mc_unsync() are not easy to understand, so add some comments to explain why they are correct. Similar for the rest netif_addr_lock_bh() cases, they don't need nested version. Cc: Taehee Yoo Cc: Dmitry Vyukov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/dev_addr_lists.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 6393ba930097..54cd568e7c2f 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -690,6 +690,15 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; + /* netif_addr_lock_bh() uses lockdep subclass 0, this is okay for two + * reasons: + * 1) This is always called without any addr_list_lock, so as the + * outermost one here, it must be 0. + * 2) This is called by some callers after unlinking the upper device, + * so the dev->lower_level becomes 1 again. + * Therefore, the subclass for 'from' is 0, for 'to' is either 1 or + * larger. + */ netif_addr_lock_bh(from); netif_addr_lock_nested(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); @@ -911,6 +920,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; + /* See the above comments inside dev_uc_unsync(). */ netif_addr_lock_bh(from); netif_addr_lock_nested(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); From a9b1110162357689a34992d5c925852948e5b9fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 27 Jun 2020 13:31:50 -0700 Subject: [PATCH 060/343] llc: make sure applications use ARPHRD_ETHER syzbot was to trigger a bug by tricking AF_LLC with non sensible addr->sllc_arphrd It seems clear LLC requires an Ethernet device. Back in commit abf9d537fea2 ("llc: add support for SO_BINDTODEVICE") Octavian Purdila added possibility for application to use a zero value for sllc_arphrd, convert it to ARPHRD_ETHER to not cause regressions on existing applications. BUG: KASAN: use-after-free in __read_once_size include/linux/compiler.h:199 [inline] BUG: KASAN: use-after-free in list_empty include/linux/list.h:268 [inline] BUG: KASAN: use-after-free in waitqueue_active include/linux/wait.h:126 [inline] BUG: KASAN: use-after-free in wq_has_sleeper include/linux/wait.h:160 [inline] BUG: KASAN: use-after-free in skwq_has_sleeper include/net/sock.h:2092 [inline] BUG: KASAN: use-after-free in sock_def_write_space+0x642/0x670 net/core/sock.c:2813 Read of size 8 at addr ffff88801e0b4078 by task ksoftirqd/3/27 CPU: 3 PID: 27 Comm: ksoftirqd/3 Not tainted 5.5.0-rc1-syzkaller #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:639 __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135 __read_once_size include/linux/compiler.h:199 [inline] list_empty include/linux/list.h:268 [inline] waitqueue_active include/linux/wait.h:126 [inline] wq_has_sleeper include/linux/wait.h:160 [inline] skwq_has_sleeper include/net/sock.h:2092 [inline] sock_def_write_space+0x642/0x670 net/core/sock.c:2813 sock_wfree+0x1e1/0x260 net/core/sock.c:1958 skb_release_head_state+0xeb/0x260 net/core/skbuff.c:652 skb_release_all+0x16/0x60 net/core/skbuff.c:663 __kfree_skb net/core/skbuff.c:679 [inline] consume_skb net/core/skbuff.c:838 [inline] consume_skb+0xfb/0x410 net/core/skbuff.c:832 __dev_kfree_skb_any+0xa4/0xd0 net/core/dev.c:2967 dev_kfree_skb_any include/linux/netdevice.h:3650 [inline] e1000_unmap_and_free_tx_resource.isra.0+0x21b/0x3a0 drivers/net/ethernet/intel/e1000/e1000_main.c:1963 e1000_clean_tx_irq drivers/net/ethernet/intel/e1000/e1000_main.c:3854 [inline] e1000_clean+0x4cc/0x1d10 drivers/net/ethernet/intel/e1000/e1000_main.c:3796 napi_poll net/core/dev.c:6532 [inline] net_rx_action+0x508/0x1120 net/core/dev.c:6600 __do_softirq+0x262/0x98c kernel/softirq.c:292 run_ksoftirqd kernel/softirq.c:603 [inline] run_ksoftirqd+0x8e/0x110 kernel/softirq.c:595 smpboot_thread_fn+0x6a3/0xa40 kernel/smpboot.c:165 kthread+0x361/0x430 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Allocated by task 8247: save_stack+0x23/0x90 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] __kasan_kmalloc mm/kasan/common.c:513 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:486 kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:521 slab_post_alloc_hook mm/slab.h:584 [inline] slab_alloc mm/slab.c:3320 [inline] kmem_cache_alloc+0x121/0x710 mm/slab.c:3484 sock_alloc_inode+0x1c/0x1d0 net/socket.c:240 alloc_inode+0x68/0x1e0 fs/inode.c:230 new_inode_pseudo+0x19/0xf0 fs/inode.c:919 sock_alloc+0x41/0x270 net/socket.c:560 __sock_create+0xc2/0x730 net/socket.c:1384 sock_create net/socket.c:1471 [inline] __sys_socket+0x103/0x220 net/socket.c:1513 __do_sys_socket net/socket.c:1522 [inline] __se_sys_socket net/socket.c:1520 [inline] __ia32_sys_socket+0x73/0xb0 net/socket.c:1520 do_syscall_32_irqs_on arch/x86/entry/common.c:337 [inline] do_fast_syscall_32+0x27b/0xe16 arch/x86/entry/common.c:408 entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139 Freed by task 17: save_stack+0x23/0x90 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] kasan_set_free_info mm/kasan/common.c:335 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:474 kasan_slab_free+0xe/0x10 mm/kasan/common.c:483 __cache_free mm/slab.c:3426 [inline] kmem_cache_free+0x86/0x320 mm/slab.c:3694 sock_free_inode+0x20/0x30 net/socket.c:261 i_callback+0x44/0x80 fs/inode.c:219 __rcu_reclaim kernel/rcu/rcu.h:222 [inline] rcu_do_batch kernel/rcu/tree.c:2183 [inline] rcu_core+0x570/0x1540 kernel/rcu/tree.c:2408 rcu_core_si+0x9/0x10 kernel/rcu/tree.c:2417 __do_softirq+0x262/0x98c kernel/softirq.c:292 The buggy address belongs to the object at ffff88801e0b4000 which belongs to the cache sock_inode_cache of size 1152 The buggy address is located 120 bytes inside of 1152-byte region [ffff88801e0b4000, ffff88801e0b4480) The buggy address belongs to the page: page:ffffea0000782d00 refcount:1 mapcount:0 mapping:ffff88807aa59c40 index:0xffff88801e0b4ffd raw: 00fffe0000000200 ffffea00008e6c88 ffffea0000782d48 ffff88807aa59c40 raw: ffff88801e0b4ffd ffff88801e0b4000 0000000100000003 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88801e0b3f00: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff88801e0b3f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88801e0b4000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88801e0b4080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88801e0b4100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: abf9d537fea2 ("llc: add support for SO_BINDTODEVICE") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/llc/af_llc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 54fb8d452a7b..6e53e43c1907 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -273,6 +273,10 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) if (!sock_flag(sk, SOCK_ZAPPED)) goto out; + if (!addr->sllc_arphrd) + addr->sllc_arphrd = ARPHRD_ETHER; + if (addr->sllc_arphrd != ARPHRD_ETHER) + goto out; rc = -ENODEV; if (sk->sk_bound_dev_if) { llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); @@ -328,7 +332,9 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; - if (unlikely(addr->sllc_family != AF_LLC)) + if (!addr->sllc_arphrd) + addr->sllc_arphrd = ARPHRD_ETHER; + if (unlikely(addr->sllc_family != AF_LLC || addr->sllc_arphrd != ARPHRD_ETHER)) goto out; dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); rc = -ENODEV; @@ -336,8 +342,6 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) if (sk->sk_bound_dev_if) { llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); if (llc->dev) { - if (!addr->sllc_arphrd) - addr->sllc_arphrd = llc->dev->type; if (is_zero_ether_addr(addr->sllc_mac)) memcpy(addr->sllc_mac, llc->dev->dev_addr, IFHWADDRLEN); From 71cdec4fab76667dabdbb2ca232b039004ebd40f Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Sun, 21 Jun 2020 13:43:02 -0700 Subject: [PATCH 061/343] dma-mapping: warn when coherent pool is depleted When a DMA coherent pool is depleted, allocation failures may or may not get reported in the kernel log depending on the allocator. The admin does have a workaround, however, by using coherent_pool= on the kernel command line. Provide some guidance on the failure and a recommended minimum size for the pools (double the size). Signed-off-by: David Rientjes Tested-by: Guenter Roeck Signed-off-by: Christoph Hellwig --- kernel/dma/pool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 8cfa01243ed2..39ca26fa41b5 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -239,12 +239,16 @@ void *dma_alloc_from_pool(struct device *dev, size_t size, } val = gen_pool_alloc(pool, size); - if (val) { + if (likely(val)) { phys_addr_t phys = gen_pool_virt_to_phys(pool, val); *ret_page = pfn_to_page(__phys_to_pfn(phys)); ptr = (void *)val; memset(ptr, 0, size); + } else { + WARN_ONCE(1, "DMA coherent pool depleted, increase size " + "(recommended min coherent_pool=%zuK)\n", + gen_pool_size(pool) >> 9); } if (gen_pool_avail(pool) < atomic_pool_size) schedule_work(&atomic_pool_work); From 3047766bc6ec9c6bc9ece85b45a41ff401e8d988 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 18 Jun 2020 17:16:27 +0200 Subject: [PATCH 062/343] s390/pci: fix enabling a reserved PCI function In usual IPL or hot plug scenarios a zPCI function transitions directly from reserved (invisible to Linux) to configured state or is configured by Linux itself using an SCLP, however it can also first go from reserved to standby and then from standby to configured without Linux initiative. In this scenario we first get a PEC event 0x302 and then 0x301. This may happen for example when the device is deconfigured at another LPAR and made available for this LPAR. It may also happen under z/VM when a device is attached while in some inconsistent state. However when we get the 0x301 the device is already known to zPCI so calling zpci_create() will add it twice resulting in the below BUG. Instead we should only enable the existing device and finally scan it through the PCI subsystem. list_add double add: new=00000000ed5a9008, prev=00000000ed5a9008, next=0000000083502300. kernel BUG at lib/list_debug.c:31! Krnl PSW : 0704c00180000000 0000000082dc2db8 (__list_add_valid+0x70/0xa8) Call Trace: [<0000000082dc2db8>] __list_add_valid+0x70/0xa8 ([<0000000082dc2db4>] __list_add_valid+0x6c/0xa8) [<00000000828ea920>] zpci_create_device+0x60/0x1b0 [<00000000828ef04a>] zpci_event_availability+0x282/0x2f0 [<000000008315f848>] chsc_process_crw+0x2b8/0xa18 [<000000008316735c>] crw_collect_info+0x254/0x348 [<00000000829226ea>] kthread+0x14a/0x168 [<000000008319d5c0>] ret_from_fork+0x24/0x2c Fixes: f606b3ef47c9 ("s390/pci: adapt events for zbus") Reported-by: Alexander Egorenkov Tested-by: Alexander Egorenkov Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_event.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 08e1d619398e..fdebd286f402 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -94,7 +94,18 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) } zdev->fh = ccdf->fh; zdev->state = ZPCI_FN_STATE_CONFIGURED; - zpci_create_device(zdev); + ret = zpci_enable_device(zdev); + if (ret) + break; + + pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn); + if (!pdev) + break; + + pci_bus_add_device(pdev); + pci_lock_rescan_remove(); + pci_bus_add_devices(zdev->zbus->bus); + pci_unlock_rescan_remove(); break; case 0x0302: /* Reserved -> Standby */ if (!zdev) { From d6df52e9996dcc2062c3d9c9123288468bb95b52 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 24 Jun 2020 17:39:14 +0200 Subject: [PATCH 063/343] s390/maccess: add no DAT mode to kernel_write To be able to patch kernel code before paging is initialized do plain memcpy if DAT is off. This is required to enable early jump label initialization. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/mm/maccess.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 22a0be655f27..1d17413b319a 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -62,11 +62,15 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size) long copied; spin_lock_irqsave(&s390_kernel_write_lock, flags); - while (size) { - copied = s390_kernel_write_odd(tmp, src, size); - tmp += copied; - src += copied; - size -= copied; + if (!(flags & PSW_MASK_DAT)) { + memcpy(dst, src, size); + } else { + while (size) { + copied = s390_kernel_write_odd(tmp, src, size); + tmp += copied; + src += copied; + size -= copied; + } } spin_unlock_irqrestore(&s390_kernel_write_lock, flags); From 95e61b1b5d6394b53d147c0fcbe2ae70fbe09446 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 18 Jun 2020 17:17:19 +0200 Subject: [PATCH 064/343] s390/setup: init jump labels before command line parsing Command line parameters might set static keys. This is true for s390 at least since commit 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options"). To avoid the following WARN: static_key_enable_cpuslocked(): static key 'init_on_alloc+0x0/0x40' used before call to jump_label_init() call jump_label_init() just before parse_early_param(). jump_label_init() is safe to call multiple times (x86 does that), doesn't do any memory allocations and hence should be safe to call that early. Fixes: 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options") Cc: # 5.3: d6df52e9996d: s390/maccess: add no DAT mode to kernel_write Cc: # 5.3 Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 5853c9872dfe..07aa15ba43b3 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1100,6 +1100,7 @@ void __init setup_arch(char **cmdline_p) if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) nospec_auto_detect(); + jump_label_init(); parse_early_param(); #ifdef CONFIG_CRASH_DUMP /* Deactivate elfcorehdr= kernel parameter */ From 9d3c447c72fb2337ca39f245c6ae89f2369de216 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 29 Jun 2020 18:26:31 +0800 Subject: [PATCH 065/343] KVM: X86: Fix async pf caused null-ptr-deref Syzbot reported that: CPU: 1 PID: 6780 Comm: syz-executor153 Not tainted 5.7.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__apic_accept_irq+0x46/0xb80 Call Trace: kvm_arch_async_page_present+0x7de/0x9e0 kvm_check_async_pf_completion+0x18d/0x400 kvm_arch_vcpu_ioctl_run+0x18bf/0x69f0 kvm_vcpu_ioctl+0x46a/0xe20 ksys_ioctl+0x11a/0x180 __x64_sys_ioctl+0x6f/0xb0 do_syscall_64+0xf6/0x7d0 entry_SYSCALL_64_after_hwframe+0x49/0xb3 The testcase enables APF mechanism in MSR_KVM_ASYNC_PF_EN with ASYNC_PF_INT enabled w/o setting MSR_KVM_ASYNC_PF_INT before, what's worse, interrupt based APF 'page ready' event delivery depends on in kernel lapic, however, we didn't bail out when lapic is not in kernel during guest setting MSR_KVM_ASYNC_PF_EN which causes the null-ptr-deref in host later. This patch fixes it. Reported-by: syzbot+1bf777dfdde86d64b89b@syzkaller.appspotmail.com Fixes: 2635b5c4a0 (KVM: x86: interrupt based APF 'page ready' event delivery) Signed-off-by: Wanpeng Li Message-Id: <1593426391-8231-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3b92db412335..a026d926072c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2693,6 +2693,9 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) if (data & 0x30) return 1; + if (!lapic_in_kernel(vcpu)) + return 1; + vcpu->arch.apf.msr_en_val = data; if (!kvm_pv_async_pf_enabled(vcpu)) { From bf64ff4c2aac65d680dc639a511c781cf6b6ec08 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 27 Jun 2020 00:12:24 -0700 Subject: [PATCH 066/343] genetlink: get rid of family->attrbuf genl_family_rcv_msg_attrs_parse() reuses the global family->attrbuf when family->parallel_ops is false. However, family->attrbuf is not protected by any lock on the genl_family_rcv_msg_doit() code path. This leads to several different consequences, one of them is UAF, like the following: genl_family_rcv_msg_doit(): genl_start(): genl_family_rcv_msg_attrs_parse() attrbuf = family->attrbuf __nlmsg_parse(attrbuf); genl_family_rcv_msg_attrs_parse() attrbuf = family->attrbuf __nlmsg_parse(attrbuf); info->attrs = attrs; cb->data = info; netlink_unicast_kernel(): consume_skb() genl_lock_dumpit(): genl_dumpit_info(cb)->attrs Note family->attrbuf is an array of pointers to the skb data, once the skb is freed, any dereference of family->attrbuf will be a UAF. Maybe we could serialize the family->attrbuf with genl_mutex too, but that would make the locking more complicated. Instead, we can just get rid of family->attrbuf and always allocate attrbuf from heap like the family->parallel_ops==true code path. This may add some performance overhead but comparing with taking the global genl_mutex, it still looks better. Fixes: 75cdbdd08900 ("net: ieee802154: have genetlink code to parse the attrs during dumpit") Fixes: 057af7071344 ("net: tipc: have genetlink code to parse the attrs during dumpit") Reported-and-tested-by: syzbot+3039ddf6d7b13daf3787@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+80cad1e3cb4c41cde6ff@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+736bcbcb11b60d0c0792@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+520f8704db2b68091d44@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+c96e4dfb32f8987fdeed@syzkaller.appspotmail.com Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/genetlink.h | 2 -- net/netlink/genetlink.c | 48 +++++++++++------------------------------ 2 files changed, 13 insertions(+), 37 deletions(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 74950663bb00..ad71ed4f55ff 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -41,7 +41,6 @@ struct genl_info; * Note that unbind() will not be called symmetrically if the * generic netlink family is removed while there are still open * sockets. - * @attrbuf: buffer to store parsed attributes (private) * @mcgrps: multicast groups used by this family * @n_mcgrps: number of multicast groups * @mcgrp_offset: starting number of multicast group IDs in this family @@ -66,7 +65,6 @@ struct genl_family { struct genl_info *info); int (*mcast_bind)(struct net *net, int group); void (*mcast_unbind)(struct net *net, int group); - struct nlattr ** attrbuf; /* private */ const struct genl_ops * ops; const struct genl_multicast_group *mcgrps; unsigned int n_ops; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 55ee680e9db1..a914b9365a46 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -351,22 +351,11 @@ int genl_register_family(struct genl_family *family) start = end = GENL_ID_VFS_DQUOT; } - if (family->maxattr && !family->parallel_ops) { - family->attrbuf = kmalloc_array(family->maxattr + 1, - sizeof(struct nlattr *), - GFP_KERNEL); - if (family->attrbuf == NULL) { - err = -ENOMEM; - goto errout_locked; - } - } else - family->attrbuf = NULL; - family->id = idr_alloc_cyclic(&genl_fam_idr, family, start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; - goto errout_free; + goto errout_locked; } err = genl_validate_assign_mc_groups(family); @@ -385,8 +374,6 @@ int genl_register_family(struct genl_family *family) errout_remove: idr_remove(&genl_fam_idr, family->id); -errout_free: - kfree(family->attrbuf); errout_locked: genl_unlock_all(); return err; @@ -419,8 +406,6 @@ int genl_unregister_family(const struct genl_family *family) atomic_read(&genl_sk_destructing_cnt) == 0); genl_unlock(); - kfree(family->attrbuf); - genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); return 0; @@ -485,30 +470,23 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family, if (!family->maxattr) return NULL; - if (family->parallel_ops) { - attrbuf = kmalloc_array(family->maxattr + 1, - sizeof(struct nlattr *), GFP_KERNEL); - if (!attrbuf) - return ERR_PTR(-ENOMEM); - } else { - attrbuf = family->attrbuf; - } + attrbuf = kmalloc_array(family->maxattr + 1, + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) + return ERR_PTR(-ENOMEM); err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, family->policy, validate, extack); if (err) { - if (family->parallel_ops) - kfree(attrbuf); + kfree(attrbuf); return ERR_PTR(err); } return attrbuf; } -static void genl_family_rcv_msg_attrs_free(const struct genl_family *family, - struct nlattr **attrbuf) +static void genl_family_rcv_msg_attrs_free(struct nlattr **attrbuf) { - if (family->parallel_ops) - kfree(attrbuf); + kfree(attrbuf); } struct genl_start_context { @@ -542,7 +520,7 @@ static int genl_start(struct netlink_callback *cb) no_attrs: info = genl_dumpit_info_alloc(); if (!info) { - genl_family_rcv_msg_attrs_free(ctx->family, attrs); + genl_family_rcv_msg_attrs_free(attrs); return -ENOMEM; } info->family = ctx->family; @@ -559,7 +537,7 @@ static int genl_start(struct netlink_callback *cb) } if (rc) { - genl_family_rcv_msg_attrs_free(info->family, info->attrs); + genl_family_rcv_msg_attrs_free(info->attrs); genl_dumpit_info_free(info); cb->data = NULL; } @@ -588,7 +566,7 @@ static int genl_lock_done(struct netlink_callback *cb) rc = ops->done(cb); genl_unlock(); } - genl_family_rcv_msg_attrs_free(info->family, info->attrs); + genl_family_rcv_msg_attrs_free(info->attrs); genl_dumpit_info_free(info); return rc; } @@ -601,7 +579,7 @@ static int genl_parallel_done(struct netlink_callback *cb) if (ops->done) rc = ops->done(cb); - genl_family_rcv_msg_attrs_free(info->family, info->attrs); + genl_family_rcv_msg_attrs_free(info->attrs); genl_dumpit_info_free(info); return rc; } @@ -694,7 +672,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family, family->post_doit(ops, skb, &info); out: - genl_family_rcv_msg_attrs_free(family, attrbuf); + genl_family_rcv_msg_attrs_free(attrbuf); return err; } From 5aee52c44d9170591df65fafa1cd408acc1225ce Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 27 Jun 2020 06:32:42 -0700 Subject: [PATCH 067/343] scsi: scsi_transport_spi: Fix function pointer check clang static analysis flags several null function pointer problems. drivers/scsi/scsi_transport_spi.c:374:1: warning: Called function pointer is null (null dereference) [core.CallAndMessage] spi_transport_max_attr(offset, "%d\n"); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Reviewing the store_spi_store_max macro if (i->f->set_##field) return -EINVAL; should be if (!i->f->set_##field) return -EINVAL; Link: https://lore.kernel.org/r/20200627133242.21618-1-trix@redhat.com Reviewed-by: James Bottomley Signed-off-by: Tom Rix Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c index f8661062ef95..f3d5b1bbd5aa 100644 --- a/drivers/scsi/scsi_transport_spi.c +++ b/drivers/scsi/scsi_transport_spi.c @@ -339,7 +339,7 @@ store_spi_transport_##field(struct device *dev, \ struct spi_transport_attrs *tp \ = (struct spi_transport_attrs *)&starget->starget_data; \ \ - if (i->f->set_##field) \ + if (!i->f->set_##field) \ return -EINVAL; \ val = simple_strtoul(buf, NULL, 0); \ if (val > tp->max_##field) \ From 5ecad245de2ae23dc4e2dbece92f8ccfbaed2fa7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 30 Jun 2020 07:07:20 -0400 Subject: [PATCH 068/343] KVM: x86: bit 8 of non-leaf PDPEs is not reserved Bit 8 would be the "global" bit, which does not quite make sense for non-leaf page table entries. Intel ignores it; AMD ignores it in PDEs and PDPEs, but reserves it in PML4Es. Probably, earlier versions of the AMD manual documented it as reserved in PDPEs as well, and that behavior made it into KVM as well as kvm-unit-tests; fix it. Cc: stable@vger.kernel.org Reported-by: Nadav Amit Fixes: a0c0feb57992 ("KVM: x86: reserve bit 8 of non-leaf PDPEs and PML4Es in 64-bit mode on AMD", 2014-09-03) Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 76817d13c86e..6d6a0ae7800c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4449,7 +4449,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd | - nonleaf_bit8_rsvd | gbpages_bit_rsvd | + gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 51); From 3aa91625007807bfca4155df1867a5c924a08662 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Jun 2020 15:03:56 +0200 Subject: [PATCH 069/343] dma-mapping: Add a new dma_need_sync API Add a new API to check if calls to dma_sync_single_for_{device,cpu} are required for a given DMA streaming mapping. Signed-off-by: Christoph Hellwig Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200629130359.2690853-2-hch@lst.de --- Documentation/core-api/dma-api.rst | 8 ++++++++ include/linux/dma-direct.h | 1 + include/linux/dma-mapping.h | 5 +++++ kernel/dma/direct.c | 6 ++++++ kernel/dma/mapping.c | 10 ++++++++++ 5 files changed, 30 insertions(+) diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst index 2d8d2fed7317..f41620439ef3 100644 --- a/Documentation/core-api/dma-api.rst +++ b/Documentation/core-api/dma-api.rst @@ -204,6 +204,14 @@ Returns the maximum size of a mapping for the device. The size parameter of the mapping functions like dma_map_single(), dma_map_page() and others should not be larger than the returned value. +:: + + bool + dma_need_sync(struct device *dev, dma_addr_t dma_addr); + +Returns %true if dma_sync_single_for_{device,cpu} calls are required to +transfer memory ownership. Returns %false if those calls can be skipped. + :: unsigned long diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 136f984df0d9..8b006730687b 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -87,4 +87,5 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); +bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 78f677cf45ab..a33ed3954ed4 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -461,6 +461,7 @@ int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); +bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); #else /* CONFIG_HAS_DMA */ static inline dma_addr_t dma_map_page_attrs(struct device *dev, @@ -571,6 +572,10 @@ static inline size_t dma_max_mapping_size(struct device *dev) { return 0; } +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return false; +} static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 0a4881e59aa7..ecb922a0bfa0 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -530,3 +530,9 @@ size_t dma_direct_max_mapping_size(struct device *dev) return swiotlb_max_mapping_size(dev); return SIZE_MAX; } + +bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return !dev_is_dma_coherent(dev) || + is_swiotlb_buffer(dma_to_phys(dev, dma_addr)); +} diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 98e3d873792e..a8c18c9a796f 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -397,6 +397,16 @@ size_t dma_max_mapping_size(struct device *dev) } EXPORT_SYMBOL_GPL(dma_max_mapping_size); +bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_is_direct(ops)) + return dma_direct_need_sync(dev, dma_addr); + return ops->sync_single_for_cpu || ops->sync_single_for_device; +} +EXPORT_SYMBOL_GPL(dma_need_sync); + unsigned long dma_get_merge_boundary(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); From 91d5b70273267bbae6f5d1fb4cf3510bd31ef9ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Jun 2020 15:03:57 +0200 Subject: [PATCH 070/343] xsk: Replace the cheap_dma flag with a dma_need_sync flag Invert the polarity and better name the flag so that the use case is properly documented. Signed-off-by: Christoph Hellwig Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200629130359.2690853-3-hch@lst.de --- include/net/xsk_buff_pool.h | 6 +++--- net/xdp/xsk_buff_pool.c | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index a4ff226505c9..6842990e2712 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -40,7 +40,7 @@ struct xsk_buff_pool { u32 headroom; u32 chunk_size; u32 frame_len; - bool cheap_dma; + bool dma_need_sync; bool unaligned; void *addrs; struct device *dev; @@ -80,7 +80,7 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb) void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb); static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb) { - if (xskb->pool->cheap_dma) + if (!xskb->pool->dma_need_sync) return; xp_dma_sync_for_cpu_slow(xskb); @@ -91,7 +91,7 @@ void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma, static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) { - if (pool->cheap_dma) + if (!pool->dma_need_sync) return; xp_dma_sync_for_device_slow(pool, dma, size); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 540ed75e4482..9fe84c797a70 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -55,7 +55,6 @@ struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks, pool->free_heads_cnt = chunks; pool->headroom = headroom; pool->chunk_size = chunk_size; - pool->cheap_dma = true; pool->unaligned = unaligned; pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM; INIT_LIST_HEAD(&pool->free_list); @@ -195,7 +194,7 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, xp_check_dma_contiguity(pool); pool->dev = dev; - pool->cheap_dma = xp_check_cheap_dma(pool); + pool->dma_need_sync = !xp_check_cheap_dma(pool); return 0; } EXPORT_SYMBOL(xp_dma_map); @@ -280,7 +279,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; xskb->xdp.data_meta = xskb->xdp.data; - if (!pool->cheap_dma) { + if (pool->dma_need_sync) { dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, pool->frame_len, DMA_BIDIRECTIONAL); From 53937ff7bc776aac647d0b3004d7cd21861b0f78 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Jun 2020 15:03:58 +0200 Subject: [PATCH 071/343] xsk: Remove a double pool->dev assignment in xp_dma_map ->dev is already assigned at the top of the function, remove the duplicate one at the end. Signed-off-by: Christoph Hellwig Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200629130359.2690853-4-hch@lst.de --- net/xdp/xsk_buff_pool.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 9fe84c797a70..6733e2c59e48 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -193,7 +193,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, if (pool->unaligned) xp_check_dma_contiguity(pool); - pool->dev = dev; pool->dma_need_sync = !xp_check_cheap_dma(pool); return 0; } From 7e0245753f1794f17de472dcf4694fa5ed527384 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Jun 2020 15:03:59 +0200 Subject: [PATCH 072/343] xsk: Use dma_need_sync instead of reimplenting it Use the dma_need_sync helper instead of (not always entirely correctly) poking into the dma-mapping internals. Signed-off-by: Christoph Hellwig Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200629130359.2690853-5-hch@lst.de --- net/xdp/xsk_buff_pool.c | 50 +++-------------------------------------- 1 file changed, 3 insertions(+), 47 deletions(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 6733e2c59e48..08b80669f649 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -2,9 +2,6 @@ #include #include -#include -#include -#include #include "xsk_queue.h" @@ -124,48 +121,6 @@ static void xp_check_dma_contiguity(struct xsk_buff_pool *pool) } } -static bool __maybe_unused xp_check_swiotlb_dma(struct xsk_buff_pool *pool) -{ -#if defined(CONFIG_SWIOTLB) - phys_addr_t paddr; - u32 i; - - for (i = 0; i < pool->dma_pages_cnt; i++) { - paddr = dma_to_phys(pool->dev, pool->dma_pages[i]); - if (is_swiotlb_buffer(paddr)) - return false; - } -#endif - return true; -} - -static bool xp_check_cheap_dma(struct xsk_buff_pool *pool) -{ -#if defined(CONFIG_HAS_DMA) - const struct dma_map_ops *ops = get_dma_ops(pool->dev); - - if (ops) { - return !ops->sync_single_for_cpu && - !ops->sync_single_for_device; - } - - if (!dma_is_direct(ops)) - return false; - - if (!xp_check_swiotlb_dma(pool)) - return false; - - if (!dev_is_dma_coherent(pool->dev)) { -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) - return false; -#endif - } -#endif - return true; -} - int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs, struct page **pages, u32 nr_pages) { @@ -179,6 +134,7 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, pool->dev = dev; pool->dma_pages_cnt = nr_pages; + pool->dma_need_sync = false; for (i = 0; i < pool->dma_pages_cnt; i++) { dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, @@ -187,13 +143,13 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, xp_dma_unmap(pool, attrs); return -ENOMEM; } + if (dma_need_sync(dev, dma)) + pool->dma_need_sync = true; pool->dma_pages[i] = dma; } if (pool->unaligned) xp_check_dma_contiguity(pool); - - pool->dma_need_sync = !xp_check_cheap_dma(pool); return 0; } EXPORT_SYMBOL(xp_dma_map); From 517bbe1994a3cee29a35c730662277bb5daff582 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 29 Jun 2020 23:15:00 -0700 Subject: [PATCH 073/343] bpf: Enforce BPF ringbuf size to be the power of 2 BPF ringbuf assumes the size to be a multiple of page size and the power of 2 value. The latter is important to avoid division while calculating position inside the ring buffer and using (N-1) mask instead. This patch fixes omission to enforce power-of-2 size rule. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200630061500.1804799-1-andriin@fb.com --- kernel/bpf/ringbuf.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 180414bb0d3e..0af88bbc1c15 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -132,15 +132,6 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) { struct bpf_ringbuf *rb; - if (!data_sz || !PAGE_ALIGNED(data_sz)) - return ERR_PTR(-EINVAL); - -#ifdef CONFIG_64BIT - /* on 32-bit arch, it's impossible to overflow record's hdr->pgoff */ - if (data_sz > RINGBUF_MAX_DATA_SZ) - return ERR_PTR(-E2BIG); -#endif - rb = bpf_ringbuf_area_alloc(data_sz, numa_node); if (!rb) return ERR_PTR(-ENOMEM); @@ -166,9 +157,16 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr) return ERR_PTR(-EINVAL); if (attr->key_size || attr->value_size || - attr->max_entries == 0 || !PAGE_ALIGNED(attr->max_entries)) + !is_power_of_2(attr->max_entries) || + !PAGE_ALIGNED(attr->max_entries)) return ERR_PTR(-EINVAL); +#ifdef CONFIG_64BIT + /* on 32-bit arch, it's impossible to overflow record's hdr->pgoff */ + if (attr->max_entries > RINGBUF_MAX_DATA_SZ) + return ERR_PTR(-E2BIG); +#endif + rb_map = kzalloc(sizeof(*rb_map), GFP_USER); if (!rb_map) return ERR_PTR(-ENOMEM); From c4e8fa9074ad94f80e5c0dcaa16b313e50e958c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Jun 2020 17:04:17 -0700 Subject: [PATCH 074/343] netfilter: ipset: call ip_set_free() instead of kfree() Whenever ip_set_alloc() is used, allocated memory can either use kmalloc() or vmalloc(). We should call kvfree() or ip_set_free() invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 21935 Comm: syz-executor.3 Not tainted 5.8.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__phys_addr+0xa7/0x110 arch/x86/mm/physaddr.c:28 Code: 1d 7a 09 4c 89 e3 31 ff 48 d3 eb 48 89 de e8 d0 58 3f 00 48 85 db 75 0d e8 26 5c 3f 00 4c 89 e0 5b 5d 41 5c c3 e8 19 5c 3f 00 <0f> 0b e8 12 5c 3f 00 48 c7 c0 10 10 a8 89 48 ba 00 00 00 00 00 fc RSP: 0000:ffffc900018572c0 EFLAGS: 00010046 RAX: 0000000000040000 RBX: 0000000000000001 RCX: ffffc9000fac3000 RDX: 0000000000040000 RSI: ffffffff8133f437 RDI: 0000000000000007 RBP: ffffc90098aff000 R08: 0000000000000000 R09: ffff8880ae636cdb R10: 0000000000000000 R11: 0000000000000000 R12: 0000408018aff000 R13: 0000000000080000 R14: 000000000000001d R15: ffffc900018573d8 FS: 00007fc540c66700(0000) GS:ffff8880ae600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc9dcd67200 CR3: 0000000059411000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: virt_to_head_page include/linux/mm.h:841 [inline] virt_to_cache mm/slab.h:474 [inline] kfree+0x77/0x2c0 mm/slab.c:3749 hash_net_create+0xbb2/0xd70 net/netfilter/ipset/ip_set_hash_gen.h:1536 ip_set_create+0x6a2/0x13c0 net/netfilter/ipset/ip_set_core.c:1128 nfnetlink_rcv_msg+0xbe8/0xea0 net/netfilter/nfnetlink.c:230 netlink_rcv_skb+0x15a/0x430 net/netlink/af_netlink.c:2469 nfnetlink_rcv+0x1ac/0x420 net/netfilter/nfnetlink.c:564 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:672 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2352 ___sys_sendmsg+0xf3/0x170 net/socket.c:2406 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2439 do_syscall_64+0x60/0xe0 arch/x86/entry/common.c:359 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45cb19 Code: Bad RIP value. RSP: 002b:00007fc540c65c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004fed80 RCX: 000000000045cb19 RDX: 0000000000000000 RSI: 0000000020001080 RDI: 0000000000000003 RBP: 000000000078bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 000000000000095e R14: 00000000004cc295 R15: 00007fc540c666d4 Fixes: f66ee0410b1c ("netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" reports") Fixes: 03c8b234e61a ("netfilter: ipset: Generalize extensions support") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_bitmap_ip.c | 2 +- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 2 +- net/netfilter/ipset/ip_set_bitmap_port.c | 2 +- net/netfilter/ipset/ip_set_hash_gen.h | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 486959f70cf3..a8ce04a4bb72 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -326,7 +326,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], set->variant = &bitmap_ip; if (!init_map_ip(set, map, first_ip, last_ip, elements, hosts, netmask)) { - kfree(map); + ip_set_free(map); return -ENOMEM; } if (tb[IPSET_ATTR_TIMEOUT]) { diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 2310a316e0af..2c625e0f49ec 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -363,7 +363,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ipmac; if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); + ip_set_free(map); return -ENOMEM; } if (tb[IPSET_ATTR_TIMEOUT]) { diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index e56ced66f202..7138e080def4 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -274,7 +274,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_port; if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); + ip_set_free(map); return -ENOMEM; } if (tb[IPSET_ATTR_TIMEOUT]) { diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 1ee43752d6d3..521e970be402 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -682,7 +682,7 @@ mtype_resize(struct ip_set *set, bool retried) } t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); if (!t->hregion) { - kfree(t); + ip_set_free(t); ret = -ENOMEM; goto out; } @@ -1533,7 +1533,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, } t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits)); if (!t->hregion) { - kfree(t); + ip_set_free(t); kfree(h); return -ENOMEM; } From 3b7016996c4c44db5d499d98759b82fb714bb912 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 25 Jun 2020 16:13:54 +0200 Subject: [PATCH 075/343] flow_dissector: Pull BPF program assignment up to bpf-netns Prepare for using bpf_prog_array to store attached programs by moving out code that updates the attached program out of flow dissector. Managing bpf_prog_array is more involved than updating a single bpf_prog pointer. This will let us do it all from one place, bpf/net_namespace.c, in the subsequent patch. No functional change intended. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200625141357.910330-2-jakub@cloudflare.com --- include/net/flow_dissector.h | 3 ++- kernel/bpf/net_namespace.c | 20 ++++++++++++++++++-- net/core/flow_dissector.c | 13 ++----------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index a7eba43fe4e4..4b6e36288ddd 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -372,7 +372,8 @@ flow_dissector_init_keys(struct flow_dissector_key_control *key_control, } #ifdef CONFIG_BPF_SYSCALL -int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog); +int flow_dissector_bpf_prog_attach_check(struct net *net, + struct bpf_prog *prog); #endif /* CONFIG_BPF_SYSCALL */ #endif diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c index 78cf061f8179..b951dab2687f 100644 --- a/kernel/bpf/net_namespace.c +++ b/kernel/bpf/net_namespace.c @@ -189,6 +189,7 @@ int netns_bpf_prog_query(const union bpf_attr *attr, int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { enum netns_bpf_attach_type type; + struct bpf_prog *attached; struct net *net; int ret; @@ -207,12 +208,26 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) switch (type) { case NETNS_BPF_FLOW_DISSECTOR: - ret = flow_dissector_bpf_prog_attach(net, prog); + ret = flow_dissector_bpf_prog_attach_check(net, prog); break; default: ret = -EINVAL; break; } + if (ret) + goto out_unlock; + + attached = rcu_dereference_protected(net->bpf.progs[type], + lockdep_is_held(&netns_bpf_mutex)); + if (attached == prog) { + /* The same program cannot be attached twice */ + ret = -EINVAL; + goto out_unlock; + } + rcu_assign_pointer(net->bpf.progs[type], prog); + if (attached) + bpf_prog_put(attached); + out_unlock: mutex_unlock(&netns_bpf_mutex); @@ -277,7 +292,7 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, switch (type) { case NETNS_BPF_FLOW_DISSECTOR: - err = flow_dissector_bpf_prog_attach(net, link->prog); + err = flow_dissector_bpf_prog_attach_check(net, link->prog); break; default: err = -EINVAL; @@ -286,6 +301,7 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, if (err) goto out_unlock; + rcu_assign_pointer(net->bpf.progs[type], link->prog); net->bpf.links[type] = link; out_unlock: diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d02df0b6d0d9..b57fb1359395 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -70,10 +70,10 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, EXPORT_SYMBOL(skb_flow_dissector_init); #ifdef CONFIG_BPF_SYSCALL -int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) +int flow_dissector_bpf_prog_attach_check(struct net *net, + struct bpf_prog *prog) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; - struct bpf_prog *attached; if (net == &init_net) { /* BPF flow dissector in the root namespace overrides @@ -97,15 +97,6 @@ int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) return -EEXIST; } - attached = rcu_dereference_protected(net->bpf.progs[type], - lockdep_is_held(&netns_bpf_mutex)); - if (attached == prog) - /* The same program cannot be attached twice */ - return -EINVAL; - - rcu_assign_pointer(net->bpf.progs[type], prog); - if (attached) - bpf_prog_put(attached); return 0; } #endif /* CONFIG_BPF_SYSCALL */ From 695c12147a40181fe9221d321c3f2de33c9574ed Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 25 Jun 2020 16:13:55 +0200 Subject: [PATCH 076/343] bpf, netns: Keep attached programs in bpf_prog_array Prepare for having multi-prog attachments for new netns attach types by storing programs to run in a bpf_prog_array, which is well suited for iterating over programs and running them in sequence. After this change bpf(PROG_QUERY) may block to allocate memory in bpf_prog_array_copy_to_user() for collected program IDs. This forces a change in how we protect access to the attached program in the query callback. Because bpf_prog_array_copy_to_user() can sleep, we switch from an RCU read lock to holding a mutex that serializes updaters. Because we allow only one BPF flow_dissector program to be attached to netns at all times, the bpf_prog_array pointed by net->bpf.run_array is always either detached (null) or one element long. No functional changes intended. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200625141357.910330-3-jakub@cloudflare.com --- include/net/netns/bpf.h | 5 +- kernel/bpf/net_namespace.c | 120 +++++++++++++++++++++++++------------ net/core/flow_dissector.c | 19 +++--- 3 files changed, 96 insertions(+), 48 deletions(-) diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h index a8dce2a380c8..a5015bda9979 100644 --- a/include/net/netns/bpf.h +++ b/include/net/netns/bpf.h @@ -9,9 +9,12 @@ #include struct bpf_prog; +struct bpf_prog_array; struct netns_bpf { - struct bpf_prog __rcu *progs[MAX_NETNS_BPF_ATTACH_TYPE]; + /* Array of programs to run compiled from progs or links */ + struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; + struct bpf_prog *progs[MAX_NETNS_BPF_ATTACH_TYPE]; struct bpf_link *links[MAX_NETNS_BPF_ATTACH_TYPE]; }; diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c index b951dab2687f..0dba97202357 100644 --- a/kernel/bpf/net_namespace.c +++ b/kernel/bpf/net_namespace.c @@ -33,6 +33,17 @@ static void __net_exit bpf_netns_link_auto_detach(struct bpf_link *link) net_link->net = NULL; } +/* Must be called with netns_bpf_mutex held. */ +static void netns_bpf_run_array_detach(struct net *net, + enum netns_bpf_attach_type type) +{ + struct bpf_prog_array *run_array; + + run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL, + lockdep_is_held(&netns_bpf_mutex)); + bpf_prog_array_free(run_array); +} + static void bpf_netns_link_release(struct bpf_link *link) { struct bpf_netns_link *net_link = @@ -54,8 +65,8 @@ static void bpf_netns_link_release(struct bpf_link *link) if (!net) goto out_unlock; + netns_bpf_run_array_detach(net, type); net->bpf.links[type] = NULL; - RCU_INIT_POINTER(net->bpf.progs[type], NULL); out_unlock: mutex_unlock(&netns_bpf_mutex); @@ -76,6 +87,7 @@ static int bpf_netns_link_update_prog(struct bpf_link *link, struct bpf_netns_link *net_link = container_of(link, struct bpf_netns_link, link); enum netns_bpf_attach_type type = net_link->netns_type; + struct bpf_prog_array *run_array; struct net *net; int ret = 0; @@ -93,8 +105,11 @@ static int bpf_netns_link_update_prog(struct bpf_link *link, goto out_unlock; } + run_array = rcu_dereference_protected(net->bpf.run_array[type], + lockdep_is_held(&netns_bpf_mutex)); + WRITE_ONCE(run_array->items[0].prog, new_prog); + old_prog = xchg(&link->prog, new_prog); - rcu_assign_pointer(net->bpf.progs[type], new_prog); bpf_prog_put(old_prog); out_unlock: @@ -142,14 +157,38 @@ static const struct bpf_link_ops bpf_netns_link_ops = { .show_fdinfo = bpf_netns_link_show_fdinfo, }; +/* Must be called with netns_bpf_mutex held. */ +static int __netns_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr, + struct net *net, + enum netns_bpf_attach_type type) +{ + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); + struct bpf_prog_array *run_array; + u32 prog_cnt = 0, flags = 0; + + run_array = rcu_dereference_protected(net->bpf.run_array[type], + lockdep_is_held(&netns_bpf_mutex)); + if (run_array) + prog_cnt = bpf_prog_array_length(run_array); + + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) + return -EFAULT; + if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) + return -EFAULT; + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) + return 0; + + return bpf_prog_array_copy_to_user(run_array, prog_ids, + attr->query.prog_cnt); +} + int netns_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { - __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); - u32 prog_id, prog_cnt = 0, flags = 0; enum netns_bpf_attach_type type; - struct bpf_prog *attached; struct net *net; + int ret; if (attr->query.query_flags) return -EINVAL; @@ -162,32 +201,17 @@ int netns_bpf_prog_query(const union bpf_attr *attr, if (IS_ERR(net)) return PTR_ERR(net); - rcu_read_lock(); - attached = rcu_dereference(net->bpf.progs[type]); - if (attached) { - prog_cnt = 1; - prog_id = attached->aux->id; - } - rcu_read_unlock(); + mutex_lock(&netns_bpf_mutex); + ret = __netns_bpf_prog_query(attr, uattr, net, type); + mutex_unlock(&netns_bpf_mutex); put_net(net); - - if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) - return -EFAULT; - if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) - return -EFAULT; - - if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) - return 0; - - if (copy_to_user(prog_ids, &prog_id, sizeof(u32))) - return -EFAULT; - - return 0; + return ret; } int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { + struct bpf_prog_array *run_array; enum netns_bpf_attach_type type; struct bpf_prog *attached; struct net *net; @@ -217,14 +241,28 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (ret) goto out_unlock; - attached = rcu_dereference_protected(net->bpf.progs[type], - lockdep_is_held(&netns_bpf_mutex)); + attached = net->bpf.progs[type]; if (attached == prog) { /* The same program cannot be attached twice */ ret = -EINVAL; goto out_unlock; } - rcu_assign_pointer(net->bpf.progs[type], prog); + + run_array = rcu_dereference_protected(net->bpf.run_array[type], + lockdep_is_held(&netns_bpf_mutex)); + if (run_array) { + WRITE_ONCE(run_array->items[0].prog, prog); + } else { + run_array = bpf_prog_array_alloc(1, GFP_KERNEL); + if (!run_array) { + ret = -ENOMEM; + goto out_unlock; + } + run_array->items[0].prog = prog; + rcu_assign_pointer(net->bpf.run_array[type], run_array); + } + + net->bpf.progs[type] = prog; if (attached) bpf_prog_put(attached); @@ -244,11 +282,11 @@ static int __netns_bpf_prog_detach(struct net *net, if (net->bpf.links[type]) return -EINVAL; - attached = rcu_dereference_protected(net->bpf.progs[type], - lockdep_is_held(&netns_bpf_mutex)); + attached = net->bpf.progs[type]; if (!attached) return -ENOENT; - RCU_INIT_POINTER(net->bpf.progs[type], NULL); + netns_bpf_run_array_detach(net, type); + net->bpf.progs[type] = NULL; bpf_prog_put(attached); return 0; } @@ -272,7 +310,7 @@ int netns_bpf_prog_detach(const union bpf_attr *attr) static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, enum netns_bpf_attach_type type) { - struct bpf_prog *prog; + struct bpf_prog_array *run_array; int err; mutex_lock(&netns_bpf_mutex); @@ -283,9 +321,7 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, goto out_unlock; } /* Links are not compatible with attaching prog directly */ - prog = rcu_dereference_protected(net->bpf.progs[type], - lockdep_is_held(&netns_bpf_mutex)); - if (prog) { + if (net->bpf.progs[type]) { err = -EEXIST; goto out_unlock; } @@ -301,7 +337,14 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, if (err) goto out_unlock; - rcu_assign_pointer(net->bpf.progs[type], link->prog); + run_array = bpf_prog_array_alloc(1, GFP_KERNEL); + if (!run_array) { + err = -ENOMEM; + goto out_unlock; + } + run_array->items[0].prog = link->prog; + rcu_assign_pointer(net->bpf.run_array[type], run_array); + net->bpf.links[type] = link; out_unlock: @@ -368,11 +411,12 @@ static void __net_exit netns_bpf_pernet_pre_exit(struct net *net) mutex_lock(&netns_bpf_mutex); for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) { + netns_bpf_run_array_detach(net, type); link = net->bpf.links[type]; if (link) bpf_netns_link_auto_detach(link); - else - __netns_bpf_prog_detach(net, type); + else if (net->bpf.progs[type]) + bpf_prog_put(net->bpf.progs[type]); } mutex_unlock(&netns_bpf_mutex); } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index b57fb1359395..142a8824f0a8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -86,14 +86,14 @@ int flow_dissector_bpf_prog_attach_check(struct net *net, for_each_net(ns) { if (ns == &init_net) continue; - if (rcu_access_pointer(ns->bpf.progs[type])) + if (rcu_access_pointer(ns->bpf.run_array[type])) return -EEXIST; } } else { /* Make sure root flow dissector is not attached * when attaching to the non-root namespace. */ - if (rcu_access_pointer(init_net.bpf.progs[type])) + if (rcu_access_pointer(init_net.bpf.run_array[type])) return -EEXIST; } @@ -894,7 +894,6 @@ bool __skb_flow_dissect(const struct net *net, struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; - struct bpf_prog *attached = NULL; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; bool mpls_el = false; @@ -951,14 +950,14 @@ bool __skb_flow_dissect(const struct net *net, WARN_ON_ONCE(!net); if (net) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; + struct bpf_prog_array *run_array; rcu_read_lock(); - attached = rcu_dereference(init_net.bpf.progs[type]); + run_array = rcu_dereference(init_net.bpf.run_array[type]); + if (!run_array) + run_array = rcu_dereference(net->bpf.run_array[type]); - if (!attached) - attached = rcu_dereference(net->bpf.progs[type]); - - if (attached) { + if (run_array) { struct bpf_flow_keys flow_keys; struct bpf_flow_dissector ctx = { .flow_keys = &flow_keys, @@ -966,6 +965,7 @@ bool __skb_flow_dissect(const struct net *net, .data_end = data + hlen, }; __be16 n_proto = proto; + struct bpf_prog *prog; if (skb) { ctx.skb = skb; @@ -976,7 +976,8 @@ bool __skb_flow_dissect(const struct net *net, n_proto = skb->protocol; } - ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, + prog = READ_ONCE(run_array->items[0].prog); + ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, hlen, flags); __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); From ab53cad90eb10c9991f501ba08904680a074ef3d Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 25 Jun 2020 16:13:56 +0200 Subject: [PATCH 077/343] bpf, netns: Keep a list of attached bpf_link's To support multi-prog link-based attachments for new netns attach types, we need to keep track of more than one bpf_link per attach type. Hence, convert net->bpf.links into a list, that currently can be either empty or have just one item. Instead of reusing bpf_prog_list from bpf-cgroup, we link together bpf_netns_link's themselves. This makes list management simpler as we don't have to allocate, initialize, and later release list elements. We can do this because multi-prog attachment will be available only for bpf_link, and we don't need to build a list of programs attached directly and indirectly via links. No functional changes intended. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200625141357.910330-4-jakub@cloudflare.com --- include/net/netns/bpf.h | 2 +- kernel/bpf/net_namespace.c | 42 +++++++++++++++++++++----------------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h index a5015bda9979..0ca6a1b87185 100644 --- a/include/net/netns/bpf.h +++ b/include/net/netns/bpf.h @@ -15,7 +15,7 @@ struct netns_bpf { /* Array of programs to run compiled from progs or links */ struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; struct bpf_prog *progs[MAX_NETNS_BPF_ATTACH_TYPE]; - struct bpf_link *links[MAX_NETNS_BPF_ATTACH_TYPE]; + struct list_head links[MAX_NETNS_BPF_ATTACH_TYPE]; }; #endif /* __NETNS_BPF_H__ */ diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c index 0dba97202357..7a34a8caf954 100644 --- a/kernel/bpf/net_namespace.c +++ b/kernel/bpf/net_namespace.c @@ -19,20 +19,12 @@ struct bpf_netns_link { * with netns_bpf_mutex held. */ struct net *net; + struct list_head node; /* node in list of links attached to net */ }; /* Protects updates to netns_bpf */ DEFINE_MUTEX(netns_bpf_mutex); -/* Must be called with netns_bpf_mutex held. */ -static void __net_exit bpf_netns_link_auto_detach(struct bpf_link *link) -{ - struct bpf_netns_link *net_link = - container_of(link, struct bpf_netns_link, link); - - net_link->net = NULL; -} - /* Must be called with netns_bpf_mutex held. */ static void netns_bpf_run_array_detach(struct net *net, enum netns_bpf_attach_type type) @@ -66,7 +58,7 @@ static void bpf_netns_link_release(struct bpf_link *link) goto out_unlock; netns_bpf_run_array_detach(net, type); - net->bpf.links[type] = NULL; + list_del(&net_link->node); out_unlock: mutex_unlock(&netns_bpf_mutex); @@ -225,7 +217,7 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) mutex_lock(&netns_bpf_mutex); /* Attaching prog directly is not compatible with links */ - if (net->bpf.links[type]) { + if (!list_empty(&net->bpf.links[type])) { ret = -EEXIST; goto out_unlock; } @@ -279,7 +271,7 @@ static int __netns_bpf_prog_detach(struct net *net, struct bpf_prog *attached; /* Progs attached via links cannot be detached */ - if (net->bpf.links[type]) + if (!list_empty(&net->bpf.links[type])) return -EINVAL; attached = net->bpf.progs[type]; @@ -310,13 +302,15 @@ int netns_bpf_prog_detach(const union bpf_attr *attr) static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, enum netns_bpf_attach_type type) { + struct bpf_netns_link *net_link = + container_of(link, struct bpf_netns_link, link); struct bpf_prog_array *run_array; int err; mutex_lock(&netns_bpf_mutex); /* Allow attaching only one prog or link for now */ - if (net->bpf.links[type]) { + if (!list_empty(&net->bpf.links[type])) { err = -E2BIG; goto out_unlock; } @@ -345,7 +339,7 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, run_array->items[0].prog = link->prog; rcu_assign_pointer(net->bpf.run_array[type], run_array); - net->bpf.links[type] = link; + list_add_tail(&net_link->node, &net->bpf.links[type]); out_unlock: mutex_unlock(&netns_bpf_mutex); @@ -404,24 +398,34 @@ int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog) return err; } +static int __net_init netns_bpf_pernet_init(struct net *net) +{ + int type; + + for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) + INIT_LIST_HEAD(&net->bpf.links[type]); + + return 0; +} + static void __net_exit netns_bpf_pernet_pre_exit(struct net *net) { enum netns_bpf_attach_type type; - struct bpf_link *link; + struct bpf_netns_link *net_link; mutex_lock(&netns_bpf_mutex); for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) { netns_bpf_run_array_detach(net, type); - link = net->bpf.links[type]; - if (link) - bpf_netns_link_auto_detach(link); - else if (net->bpf.progs[type]) + list_for_each_entry(net_link, &net->bpf.links[type], node) + net_link->net = NULL; /* auto-detach link */ + if (net->bpf.progs[type]) bpf_prog_put(net->bpf.progs[type]); } mutex_unlock(&netns_bpf_mutex); } static struct pernet_operations netns_bpf_pernet_ops __net_initdata = { + .init = netns_bpf_pernet_init, .pre_exit = netns_bpf_pernet_pre_exit, }; From 6ebb85c83aaf6ae75b920ef45d2a9eee42079265 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 25 Jun 2020 16:13:57 +0200 Subject: [PATCH 078/343] selftests/bpf: Test updating flow_dissector link with same program This case, while not particularly useful, is worth covering because we expect the operation to succeed as opposed when re-attaching the same program directly with PROG_ATTACH. While at it, update the tests summary that fell out of sync when tests extended to cover links. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200625141357.910330-5-jakub@cloudflare.com --- .../bpf/prog_tests/flow_dissector_reattach.c | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c index 15cb554a66d8..a2db3b0f84db 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -1,9 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Test that the flow_dissector program can be updated with a single - * syscall by attaching a new program that replaces the existing one. - * - * Corner case - the same program cannot be attached twice. + * Tests for attaching, detaching, and replacing flow_dissector BPF program. */ #define _GNU_SOURCE @@ -308,6 +305,31 @@ static void test_link_update_replace_old_prog(int netns, int prog1, int prog2) CHECK_FAIL(prog_is_attached(netns)); } +static void test_link_update_same_prog(int netns, int prog1, int prog2) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts); + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts); + int err, link; + + link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts); + if (CHECK_FAIL(link < 0)) { + perror("bpf_link_create(prog1)"); + return; + } + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + /* Expect success updating the prog with the same one */ + update_opts.flags = 0; + update_opts.old_prog_fd = 0; + err = bpf_link_update(link, prog1, &update_opts); + if (CHECK_FAIL(err)) + perror("bpf_link_update"); + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); + + close(link); + CHECK_FAIL(prog_is_attached(netns)); +} + static void test_link_update_invalid_opts(int netns, int prog1, int prog2) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts); @@ -571,6 +593,8 @@ static void run_tests(int netns) test_link_update_no_old_prog }, { "link update with replace old prog", test_link_update_replace_old_prog }, + { "link update with same prog", + test_link_update_same_prog }, { "link update invalid opts", test_link_update_invalid_opts }, { "link update invalid prog", From 1b514239e85965cc4df085180a73dd91733135f7 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 29 Jun 2020 10:56:25 +0100 Subject: [PATCH 079/343] bpf: flow_dissector: Check value of unused flags to BPF_PROG_ATTACH Using BPF_PROG_ATTACH on a flow dissector program supports neither target_fd, attach_flags or replace_bpf_fd but accepts any value. Enforce that all of them are zero. This is fine for replace_bpf_fd since its presence is indicated by BPF_F_REPLACE. It's more problematic for target_fd, since zero is a valid fd. Should we want to use the flag later on we'd have to add an exception for fd 0. The alternative is to force a value like -1. This requires more changes to tests. There is also precedent for using 0, since bpf_iter uses this for target_fd as well. Fixes: b27f7bb590ba ("flow_dissector: Move out netns_bpf prog callbacks") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200629095630.7933-2-lmb@cloudflare.com --- kernel/bpf/net_namespace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c index 7a34a8caf954..03045f45afec 100644 --- a/kernel/bpf/net_namespace.c +++ b/kernel/bpf/net_namespace.c @@ -209,6 +209,9 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) struct net *net; int ret; + if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd) + return -EINVAL; + type = to_netns_bpf_attach_type(attr->attach_type); if (type < 0) return -EINVAL; From 4ac2add65974e4efafb8d4ccd8fc5660417ea312 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 29 Jun 2020 10:56:26 +0100 Subject: [PATCH 080/343] bpf: flow_dissector: Check value of unused flags to BPF_PROG_DETACH Using BPF_PROG_DETACH on a flow dissector program supports neither attach_flags nor attach_bpf_fd. Yet no value is enforced for them. Enforce that attach_flags are zero, and require the current program to be passed via attach_bpf_fd. This allows us to remove the check for CAP_SYS_ADMIN, since userspace can now no longer remove arbitrary flow dissector programs. Fixes: b27f7bb590ba ("flow_dissector: Move out netns_bpf prog callbacks") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200629095630.7933-3-lmb@cloudflare.com --- include/linux/bpf-netns.h | 5 +++-- kernel/bpf/net_namespace.c | 19 +++++++++++++++---- kernel/bpf/syscall.c | 4 +--- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h index 4052d649f36d..47d5b0c708c9 100644 --- a/include/linux/bpf-netns.h +++ b/include/linux/bpf-netns.h @@ -33,7 +33,7 @@ int netns_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); -int netns_bpf_prog_detach(const union bpf_attr *attr); +int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -49,7 +49,8 @@ static inline int netns_bpf_prog_attach(const union bpf_attr *attr, return -EOPNOTSUPP; } -static inline int netns_bpf_prog_detach(const union bpf_attr *attr) +static inline int netns_bpf_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype) { return -EOPNOTSUPP; } diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c index 03045f45afec..3dbc29b6f51d 100644 --- a/kernel/bpf/net_namespace.c +++ b/kernel/bpf/net_namespace.c @@ -269,7 +269,8 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) /* Must be called with netns_bpf_mutex held. */ static int __netns_bpf_prog_detach(struct net *net, - enum netns_bpf_attach_type type) + enum netns_bpf_attach_type type, + struct bpf_prog *old) { struct bpf_prog *attached; @@ -278,7 +279,7 @@ static int __netns_bpf_prog_detach(struct net *net, return -EINVAL; attached = net->bpf.progs[type]; - if (!attached) + if (!attached || attached != old) return -ENOENT; netns_bpf_run_array_detach(net, type); net->bpf.progs[type] = NULL; @@ -286,19 +287,29 @@ static int __netns_bpf_prog_detach(struct net *net, return 0; } -int netns_bpf_prog_detach(const union bpf_attr *attr) +int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) { enum netns_bpf_attach_type type; + struct bpf_prog *prog; int ret; + if (attr->target_fd) + return -EINVAL; + type = to_netns_bpf_attach_type(attr->attach_type); if (type < 0) return -EINVAL; + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); + if (IS_ERR(prog)) + return PTR_ERR(prog); + mutex_lock(&netns_bpf_mutex); - ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type); + ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog); mutex_unlock(&netns_bpf_mutex); + bpf_prog_put(prog); + return ret; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7d946435587d..28c6ef759037 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2897,9 +2897,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_LIRC_MODE2: return lirc_prog_detach(attr); case BPF_PROG_TYPE_FLOW_DISSECTOR: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - return netns_bpf_prog_detach(attr); + return netns_bpf_prog_detach(attr, ptype); case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SOCK: From 9b2b09717e1812e450782a43ca0c2790651cf380 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 29 Jun 2020 10:56:27 +0100 Subject: [PATCH 081/343] bpf: sockmap: Check value of unused args to BPF_PROG_ATTACH Using BPF_PROG_ATTACH on a sockmap program currently understands no flags or replace_bpf_fd, but accepts any value. Return EINVAL instead. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200629095630.7933-4-lmb@cloudflare.com --- net/core/sock_map.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4059f94e9bb5..58016a5c63ff 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -70,6 +70,9 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) struct fd f; int ret; + if (attr->attach_flags || attr->replace_bpf_fd) + return -EINVAL; + f = fdget(ufd); map = __bpf_map_get(f); if (IS_ERR(map)) From bb0de3131f4c60a9bf976681e0fe4d1e55c7a821 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 29 Jun 2020 10:56:28 +0100 Subject: [PATCH 082/343] bpf: sockmap: Require attach_bpf_fd when detaching a program The sockmap code currently ignores the value of attach_bpf_fd when detaching a program. This is contrary to the usual behaviour of checking that attach_bpf_fd represents the currently attached program. Ensure that attach_bpf_fd is indeed the currently attached program. It turns out that all sockmap selftests already do this, which indicates that this is unlikely to cause breakage. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200629095630.7933-5-lmb@cloudflare.com --- include/linux/bpf.h | 13 +++++++++-- include/linux/skmsg.h | 13 +++++++++++ kernel/bpf/syscall.c | 2 +- net/core/sock_map.c | 50 ++++++++++++++++++++++++++++++++++++++----- 4 files changed, 70 insertions(+), 8 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07052d44bca1..9750a1902ee5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1543,13 +1543,16 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ #if defined(CONFIG_BPF_STREAM_PARSER) -int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); +int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, + struct bpf_prog *old, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else static inline int sock_map_prog_update(struct bpf_map *map, - struct bpf_prog *prog, u32 which) + struct bpf_prog *prog, + struct bpf_prog *old, u32 which) { return -EOPNOTSUPP; } @@ -1559,6 +1562,12 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr, { return -EINVAL; } + +static inline int sock_map_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_STREAM_PARSER */ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 08674cd14d5a..1e9ed840b9fc 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -430,6 +430,19 @@ static inline void psock_set_prog(struct bpf_prog **pprog, bpf_prog_put(prog); } +static inline int psock_replace_prog(struct bpf_prog **pprog, + struct bpf_prog *prog, + struct bpf_prog *old) +{ + if (cmpxchg(pprog, old, prog) != old) + return -ENOENT; + + if (old) + bpf_prog_put(old); + + return 0; +} + static inline void psock_progs_drop(struct sk_psock_progs *progs) { psock_set_prog(&progs->msg_parser, NULL); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 28c6ef759037..a74fce8ce043 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2893,7 +2893,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) switch (ptype) { case BPF_PROG_TYPE_SK_MSG: case BPF_PROG_TYPE_SK_SKB: - return sock_map_get_from_fd(attr, NULL); + return sock_map_prog_detach(attr, ptype); case BPF_PROG_TYPE_LIRC_MODE2: return lirc_prog_detach(attr); case BPF_PROG_TYPE_FLOW_DISSECTOR: diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 58016a5c63ff..0971f17e8e54 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -77,7 +77,42 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - ret = sock_map_prog_update(map, prog, attr->attach_type); + ret = sock_map_prog_update(map, prog, NULL, attr->attach_type); + fdput(f); + return ret; +} + +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) +{ + u32 ufd = attr->target_fd; + struct bpf_prog *prog; + struct bpf_map *map; + struct fd f; + int ret; + + if (attr->attach_flags || attr->replace_bpf_fd) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + prog = bpf_prog_get(attr->attach_bpf_fd); + if (IS_ERR(prog)) { + ret = PTR_ERR(prog); + goto put_map; + } + + if (prog->type != ptype) { + ret = -EINVAL; + goto put_prog; + } + + ret = sock_map_prog_update(map, NULL, prog, attr->attach_type); +put_prog: + bpf_prog_put(prog); +put_map: fdput(f); return ret; } @@ -1206,27 +1241,32 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) } int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - u32 which) + struct bpf_prog *old, u32 which) { struct sk_psock_progs *progs = sock_map_progs(map); + struct bpf_prog **pprog; if (!progs) return -EOPNOTSUPP; switch (which) { case BPF_SK_MSG_VERDICT: - psock_set_prog(&progs->msg_parser, prog); + pprog = &progs->msg_parser; break; case BPF_SK_SKB_STREAM_PARSER: - psock_set_prog(&progs->skb_parser, prog); + pprog = &progs->skb_parser; break; case BPF_SK_SKB_STREAM_VERDICT: - psock_set_prog(&progs->skb_verdict, prog); + pprog = &progs->skb_verdict; break; default: return -EOPNOTSUPP; } + if (old) + return psock_replace_prog(pprog, prog, old); + + psock_set_prog(pprog, prog); return 0; } From 0434296c72486881c2a71cd33876e4e6342001b5 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 29 Jun 2020 10:56:29 +0100 Subject: [PATCH 083/343] selftests: bpf: Pass program and target_fd in flow_dissector_reattach Pass 0 as target_fd when attaching and detaching flow dissector. Additionally, pass the expected program when detaching. Fixes: 1f043f87bb59 ("selftests/bpf: Add tests for attaching bpf_link to netns") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200629095630.7933-6-lmb@cloudflare.com --- .../bpf/prog_tests/flow_dissector_reattach.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c index a2db3b0f84db..172c586b6996 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -113,7 +113,7 @@ static void test_prog_attach_prog_attach(int netns, int prog1, int prog2) CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2)); out_detach: - err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); + err = bpf_prog_detach2(prog2, 0, BPF_FLOW_DISSECTOR); if (CHECK_FAIL(err)) perror("bpf_prog_detach"); CHECK_FAIL(prog_is_attached(netns)); @@ -149,7 +149,7 @@ static void test_prog_attach_link_create(int netns, int prog1, int prog2) DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); int err, link; - err = bpf_prog_attach(prog1, -1, BPF_FLOW_DISSECTOR, 0); + err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0); if (CHECK_FAIL(err)) { perror("bpf_prog_attach(prog1)"); return; @@ -165,7 +165,7 @@ static void test_prog_attach_link_create(int netns, int prog1, int prog2) close(link); CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); - err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR); + err = bpf_prog_detach2(prog1, 0, BPF_FLOW_DISSECTOR); if (CHECK_FAIL(err)) perror("bpf_prog_detach"); CHECK_FAIL(prog_is_attached(netns)); @@ -185,7 +185,7 @@ static void test_link_create_prog_attach(int netns, int prog1, int prog2) /* Expect failure attaching prog when link exists */ errno = 0; - err = bpf_prog_attach(prog2, -1, BPF_FLOW_DISSECTOR, 0); + err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0); if (CHECK_FAIL(!err || errno != EEXIST)) perror("bpf_prog_attach(prog2) expected EEXIST"); CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); @@ -208,7 +208,7 @@ static void test_link_create_prog_detach(int netns, int prog1, int prog2) /* Expect failure detaching prog when link exists */ errno = 0; - err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR); + err = bpf_prog_detach2(prog1, 0, BPF_FLOW_DISSECTOR); if (CHECK_FAIL(!err || errno != EINVAL)) perror("bpf_prog_detach expected EINVAL"); CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); @@ -228,7 +228,7 @@ static void test_prog_attach_detach_query(int netns, int prog1, int prog2) } CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); - err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); + err = bpf_prog_detach2(prog1, 0, BPF_FLOW_DISSECTOR); if (CHECK_FAIL(err)) { perror("bpf_prog_detach"); return; From 1a1ad3c20a6fe0e8a4b570fbf835d7cc6e87a9d8 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 29 Jun 2020 10:56:30 +0100 Subject: [PATCH 084/343] selftests: bpf: Pass program to bpf_prog_detach in flow_dissector Calling bpf_prog_detach is incorrect, since it takes target_fd as its argument. The intention here is to pass it as attach_bpf_fd, so use bpf_prog_detach2 and pass zero for target_fd. Fixes: 06716e04a043 ("selftests/bpf: Extend test_flow_dissector to cover link creation") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200629095630.7933-7-lmb@cloudflare.com --- tools/testing/selftests/bpf/prog_tests/flow_dissector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index ea14e3ece812..f11f187990e9 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -527,8 +527,8 @@ static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd) run_tests_skb_less(tap_fd, skel->maps.last_dissection); - err = bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR); - CHECK(err, "bpf_prog_detach", "err %d errno %d\n", err, errno); + err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); + CHECK(err, "bpf_prog_detach2", "err %d errno %d\n", err, errno); } static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd) From 695cf5ab401c1a368fed228ee4a624784cd17fc5 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Mon, 29 Jun 2020 05:59:33 +0300 Subject: [PATCH 085/343] ALSA: usb-audio: Fix packet size calculation Commit f0bd62b64016 ("ALSA: usb-audio: Improve frames size computation") introduced a regression for devices which have playback endpoints with bInterval > 1. Fix this by taking ep->datainterval into account. Note that frame and fps are actually mean packet and packets per second in the code introduces by the mentioned commit. This will be fixed in a follow-up patch. Fixes: f0bd62b64016 ("ALSA: usb-audio: Improve frames size computation") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=208353 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20200629025934.154288-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/endpoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 9bea7d3f99f8..11f23778f0a5 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -1093,6 +1093,7 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep, ep->freqn = get_usb_high_speed_rate(rate); ep->fps = 8000; } + ep->fps >>= ep->datainterval; ep->sample_rem = rate % ep->fps; ep->framesize[0] = rate / ep->fps; From b9fd2007c97413154e16bda01a6d5d5fc0c3bd44 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Mon, 29 Jun 2020 05:59:34 +0300 Subject: [PATCH 086/343] ALSA: usb-audio: Replace s/frame/packet/ where appropriate Replace several occurences of "frame" with a "packet" where appropriate. Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20200629025934.154288-2-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/card.h | 6 +++--- sound/usb/endpoint.c | 19 +++++++++---------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/sound/usb/card.h b/sound/usb/card.h index d6219fba9699..de43267b9c8a 100644 --- a/sound/usb/card.h +++ b/sound/usb/card.h @@ -84,10 +84,10 @@ struct snd_usb_endpoint { dma_addr_t sync_dma; /* DMA address of syncbuf */ unsigned int pipe; /* the data i/o pipe */ - unsigned int framesize[2]; /* small/large frame sizes in samples */ - unsigned int sample_rem; /* remainder from division fs/fps */ + unsigned int packsize[2]; /* small/large packet sizes in samples */ + unsigned int sample_rem; /* remainder from division fs/pps */ unsigned int sample_accum; /* sample accumulator */ - unsigned int fps; /* frames per second */ + unsigned int pps; /* packets per second */ unsigned int freqn; /* nominal sampling rate in fs/fps in Q16.16 format */ unsigned int freqm; /* momentary sampling rate in fs/fps in Q16.16 format */ int freqshift; /* how much to shift the feedback value to get Q16.16 */ diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 11f23778f0a5..88760268fb55 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -159,11 +159,11 @@ int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep) return ep->maxframesize; ep->sample_accum += ep->sample_rem; - if (ep->sample_accum >= ep->fps) { - ep->sample_accum -= ep->fps; - ret = ep->framesize[1]; + if (ep->sample_accum >= ep->pps) { + ep->sample_accum -= ep->pps; + ret = ep->packsize[1]; } else { - ret = ep->framesize[0]; + ret = ep->packsize[0]; } return ret; @@ -1088,16 +1088,15 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep, if (snd_usb_get_speed(ep->chip->dev) == USB_SPEED_FULL) { ep->freqn = get_usb_full_speed_rate(rate); - ep->fps = 1000; + ep->pps = 1000 >> ep->datainterval; } else { ep->freqn = get_usb_high_speed_rate(rate); - ep->fps = 8000; + ep->pps = 8000 >> ep->datainterval; } - ep->fps >>= ep->datainterval; - ep->sample_rem = rate % ep->fps; - ep->framesize[0] = rate / ep->fps; - ep->framesize[1] = (rate + (ep->fps - 1)) / ep->fps; + ep->sample_rem = rate % ep->pps; + ep->packsize[0] = rate / ep->pps; + ep->packsize[1] = (rate + (ep->pps - 1)) / ep->pps; /* calculate the frequency in 16.16 format */ ep->freqm = ep->freqn; From 2576f87066dc08a11cb1c05f11d1eaa02148ef9e Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 30 Jun 2020 18:45:41 +0200 Subject: [PATCH 087/343] bpf, netns: Fix use-after-free in pernet pre_exit callback Iterating over BPF links attached to network namespace in pre_exit hook is not safe, even if there is just one. Once link gets auto-detached, that is its back-pointer to net object is set to NULL, the link can be released and freed without waiting on netns_bpf_mutex, effectively causing the list element we are operating on to be freed. This leads to use-after-free when trying to access the next element on the list, as reported by KASAN. Bug can be triggered by destroying a network namespace, while also releasing a link attached to this network namespace. | ================================================================== | BUG: KASAN: use-after-free in netns_bpf_pernet_pre_exit+0xd9/0x130 | Read of size 8 at addr ffff888119e0d778 by task kworker/u8:2/177 | | CPU: 3 PID: 177 Comm: kworker/u8:2 Not tainted 5.8.0-rc1-00197-ga0c04c9d1008-dirty #776 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014 | Workqueue: netns cleanup_net | Call Trace: | dump_stack+0x9e/0xe0 | print_address_description.constprop.0+0x3a/0x60 | ? netns_bpf_pernet_pre_exit+0xd9/0x130 | kasan_report.cold+0x1f/0x40 | ? netns_bpf_pernet_pre_exit+0xd9/0x130 | netns_bpf_pernet_pre_exit+0xd9/0x130 | cleanup_net+0x30b/0x5b0 | ? unregister_pernet_device+0x50/0x50 | ? rcu_read_lock_bh_held+0xb0/0xb0 | ? _raw_spin_unlock_irq+0x24/0x50 | process_one_work+0x4d1/0xa10 | ? lock_release+0x3e0/0x3e0 | ? pwq_dec_nr_in_flight+0x110/0x110 | ? rwlock_bug.part.0+0x60/0x60 | worker_thread+0x7a/0x5c0 | ? process_one_work+0xa10/0xa10 | kthread+0x1e3/0x240 | ? kthread_create_on_node+0xd0/0xd0 | ret_from_fork+0x1f/0x30 | | Allocated by task 280: | save_stack+0x1b/0x40 | __kasan_kmalloc.constprop.0+0xc2/0xd0 | netns_bpf_link_create+0xfe/0x650 | __do_sys_bpf+0x153a/0x2a50 | do_syscall_64+0x59/0x300 | entry_SYSCALL_64_after_hwframe+0x44/0xa9 | | Freed by task 198: | save_stack+0x1b/0x40 | __kasan_slab_free+0x12f/0x180 | kfree+0xed/0x350 | process_one_work+0x4d1/0xa10 | worker_thread+0x7a/0x5c0 | kthread+0x1e3/0x240 | ret_from_fork+0x1f/0x30 | | The buggy address belongs to the object at ffff888119e0d700 | which belongs to the cache kmalloc-192 of size 192 | The buggy address is located 120 bytes inside of | 192-byte region [ffff888119e0d700, ffff888119e0d7c0) | The buggy address belongs to the page: | page:ffffea0004678340 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 | flags: 0x2fffe0000000200(slab) | raw: 02fffe0000000200 ffffea00045ba8c0 0000000600000006 ffff88811a80ea80 | raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 | page dumped because: kasan: bad access detected | | Memory state around the buggy address: | ffff888119e0d600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb | ffff888119e0d680: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc | >ffff888119e0d700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb | ^ | ffff888119e0d780: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc | ffff888119e0d800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb | ================================================================== Remove the "fast-path" for releasing a link that got auto-detached by a dying network namespace to fix it. This way as long as link is on the list and netns_bpf mutex is held, we have a guarantee that link memory can be accessed. An alternative way to fix this issue would be to safely iterate over the list of links and ensure there is no access to link object after detaching it. But, at the moment, optimizing synchronization overhead on link release without a workload in mind seems like an overkill. Fixes: ab53cad90eb1 ("bpf, netns: Keep a list of attached bpf_link's") Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200630164541.1329993-1-jakub@cloudflare.com --- kernel/bpf/net_namespace.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c index 3dbc29b6f51d..310241ca7991 100644 --- a/kernel/bpf/net_namespace.c +++ b/kernel/bpf/net_namespace.c @@ -43,15 +43,11 @@ static void bpf_netns_link_release(struct bpf_link *link) enum netns_bpf_attach_type type = net_link->netns_type; struct net *net; - /* Link auto-detached by dying netns. */ - if (!net_link->net) - return; - mutex_lock(&netns_bpf_mutex); - /* Recheck after potential sleep. We can race with cleanup_net - * here, but if we see a non-NULL struct net pointer pre_exit - * has not happened yet and will block on netns_bpf_mutex. + /* We can race with cleanup_net, but if we see a non-NULL + * struct net pointer, pre_exit has not run yet and wait for + * netns_bpf_mutex. */ net = net_link->net; if (!net) From 2606aff916854b61234bf85001be9777bab2d5f8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Jun 2020 19:06:18 -0600 Subject: [PATCH 088/343] net: ip_tunnel: add header_ops for layer 3 devices Some devices that take straight up layer 3 packets benefit from having a shared header_ops so that AF_PACKET sockets can inject packets that are recognized. This shared infrastructure will be used by other drivers that currently can't inject packets using AF_PACKET. It also exposes the parser function, as it is useful in standalone form too. Signed-off-by: Jason A. Donenfeld Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 3 +++ net/ipv4/ip_tunnel_core.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 076e5d7db7d3..36025dea7612 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -290,6 +290,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); +extern const struct header_ops ip_tunnel_header_ops; +__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); + struct ip_tunnel_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 181b7a2a0247..f8b419e2475c 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -844,3 +844,21 @@ void ip_tunnel_unneed_metadata(void) static_branch_dec(&ip_tunnel_metadata_cnt); } EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata); + +/* Returns either the correct skb->protocol value, or 0 if invalid. */ +__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb) +{ + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && + ip_hdr(skb)->version == 4) + return htons(ETH_P_IP); + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && + ipv6_hdr(skb)->version == 6) + return htons(ETH_P_IPV6); + return 0; +} +EXPORT_SYMBOL(ip_tunnel_parse_protocol); + +const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol }; +EXPORT_SYMBOL(ip_tunnel_header_ops); From e53ac93220e002fdf26b2874af6a74f393cd3872 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Jun 2020 19:06:19 -0600 Subject: [PATCH 089/343] net: ipip: implement header_ops->parse_protocol for AF_PACKET Ipip uses skb->protocol to determine packet type, and bails out if it's not set. For AF_PACKET injection, we need to support its call chain of: packet_sendmsg -> packet_snd -> packet_parse_headers -> dev_parse_header_protocol -> parse_protocol Without a valid parse_protocol, this returns zero, and ipip rejects the skb. So, this wires up the ip_tunnel handler for layer 3 packets for that case. Signed-off-by: Jason A. Donenfeld Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 1 + net/ipv6/ip6_tunnel.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 40fea52c8277..75d35e76bec2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -361,6 +361,7 @@ static const struct net_device_ops ipip_netdev_ops = { static void ipip_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->type = ARPHRD_TUNNEL; dev->flags = IFF_NOARP; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 821d96c720b9..a18c378ca5f4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1846,6 +1846,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { dev->netdev_ops = &ip6_tnl_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6_dev_free; From 01a4967c71c004f8ecad4ab57021348636502fa9 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Jun 2020 19:06:20 -0600 Subject: [PATCH 090/343] wireguard: implement header_ops->parse_protocol for AF_PACKET WireGuard uses skb->protocol to determine packet type, and bails out if it's not set or set to something it's not expecting. For AF_PACKET injection, we need to support its call chain of: packet_sendmsg -> packet_snd -> packet_parse_headers -> dev_parse_header_protocol -> parse_protocol Without a valid parse_protocol, this returns zero, and wireguard then rejects the skb. So, this wires up the ip_tunnel handler for layer 3 packets for that case. Reported-by: Hans Wippel Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index a8f151b1b5fa..c9f65e96ccb0 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -262,6 +262,7 @@ static void wg_setup(struct net_device *dev) max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); dev->netdev_ops = &netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->hard_header_len = 0; dev->addr_len = 0; dev->needed_headroom = DATA_PACKET_HEAD_ROOM; From 1a574074ae7d1d745c16f7710655f38a53174c27 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Jun 2020 19:06:21 -0600 Subject: [PATCH 091/343] wireguard: queueing: make use of ip_tunnel_parse_protocol Now that wg_examine_packet_protocol has been added for general consumption as ip_tunnel_parse_protocol, it's possible to remove wg_examine_packet_protocol and simply use the new ip_tunnel_parse_protocol function directly. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/queueing.h | 19 ++----------------- drivers/net/wireguard/receive.c | 2 +- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index c58df439dbbe..dfb674e03076 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -11,6 +11,7 @@ #include #include #include +#include struct wg_device; struct wg_peer; @@ -65,25 +66,9 @@ struct packet_cb { #define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb)) #define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) -/* Returns either the correct skb->protocol value, or 0 if invalid. */ -static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb) -{ - if (skb_network_header(skb) >= skb->head && - (skb_network_header(skb) + sizeof(struct iphdr)) <= - skb_tail_pointer(skb) && - ip_hdr(skb)->version == 4) - return htons(ETH_P_IP); - if (skb_network_header(skb) >= skb->head && - (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= - skb_tail_pointer(skb) && - ipv6_hdr(skb)->version == 6) - return htons(ETH_P_IPV6); - return 0; -} - static inline bool wg_check_packet_protocol(struct sk_buff *skb) { - __be16 real_protocol = wg_examine_packet_protocol(skb); + __be16 real_protocol = ip_tunnel_parse_protocol(skb); return real_protocol && skb->protocol == real_protocol; } diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 9b2ab6fc91cd..2c9551ea6dc7 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -387,7 +387,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, */ skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = ~0; /* All levels */ - skb->protocol = wg_examine_packet_protocol(skb); + skb->protocol = ip_tunnel_parse_protocol(skb); if (skb->protocol == htons(ETH_P_IP)) { len = ntohs(ip_hdr(skb)->tot_len); if (unlikely(len < sizeof(struct iphdr))) From b9815eb1d13f0dc088ee8afb6e6d0683ea551098 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Jun 2020 19:06:22 -0600 Subject: [PATCH 092/343] tun: implement header_ops->parse_protocol for AF_PACKET The tun driver passes up skb->protocol to userspace in the form of PI headers. For AF_PACKET injection, we need to support its call chain of: packet_sendmsg -> packet_snd -> packet_parse_headers -> dev_parse_header_protocol -> parse_protocol Without a valid parse_protocol, this returns zero, and the tun driver then gives userspace bogus values that it can't deal with. Note that this isn't the case with tap, because tap already benefits from the shared infrastructure for ethernet headers. But with tun, there's nothing. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 858b012074bd..7adeb91bd368 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -1351,6 +1352,7 @@ static void tun_net_init(struct net_device *dev) switch (tun->flags & TUN_TYPE_MASK) { case IFF_TUN: dev->netdev_ops = &tun_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; /* Point-to-Point TUN Device */ dev->hard_header_len = 0; From ab59d2b6982b69a9728296ee3a1f330a72c0383e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Jun 2020 19:06:23 -0600 Subject: [PATCH 093/343] net: vti: implement header_ops->parse_protocol for AF_PACKET Vti uses skb->protocol to determine packet type, and bails out if it's not set. For AF_PACKET injection, we need to support its call chain of: packet_sendmsg -> packet_snd -> packet_parse_headers -> dev_parse_header_protocol -> parse_protocol Without a valid parse_protocol, this returns zero, and vti rejects the skb. So, this wires up the ip_tunnel handler for layer 3 packets for that case. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- net/ipv4/ip_vti.c | 1 + net/ipv6/ip6_vti.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1d9c8cff5ac3..460ca1099e8a 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -441,6 +441,7 @@ static const struct net_device_ops vti_netdev_ops = { static void vti_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &vti_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->type = ARPHRD_TUNNEL; ip_tunnel_setup(dev, vti_net_id); } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 1147f647b9a0..0d964160a9dd 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -905,6 +905,7 @@ static const struct net_device_ops vti6_netdev_ops = { static void vti6_dev_setup(struct net_device *dev) { dev->netdev_ops = &vti6_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = vti6_dev_free; From 75ea1f4773c09730bf8a364a367f6e7211484e12 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Jun 2020 19:06:24 -0600 Subject: [PATCH 094/343] net: sit: implement header_ops->parse_protocol for AF_PACKET Sit uses skb->protocol to determine packet type, and bails out if it's not set. For AF_PACKET injection, we need to support its call chain of: packet_sendmsg -> packet_snd -> packet_parse_headers -> dev_parse_header_protocol -> parse_protocol Without a valid parse_protocol, this returns zero, and sit rejects the skb. So, this wires up the ip_tunnel handler for layer 3 packets for that case. Reported-by: Willem de Bruijn Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- net/ipv6/sit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1fbb4dfbb191..5e2c34c0ac97 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1421,6 +1421,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) int t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->netdev_ops = &ipip6_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = ipip6_dev_free; From 8f9a1fa4308363944ba94a961f69646c4b0ff26b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Jun 2020 19:06:25 -0600 Subject: [PATCH 095/343] net: xfrmi: implement header_ops->parse_protocol for AF_PACKET The xfrm interface uses skb->protocol to determine packet type, and bails out if it's not set. For AF_PACKET injection, we need to support its call chain of: packet_sendmsg -> packet_snd -> packet_parse_headers -> dev_parse_header_protocol -> parse_protocol Without a valid parse_protocol, this returns zero, and xfrmi rejects the skb. So, this wires up the ip_tunnel handler for layer 3 packets for that case. Reported-by: Willem de Bruijn Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- net/xfrm/xfrm_interface.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index c407ecbc5d46..b615729812e5 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -581,6 +582,7 @@ static const struct net_device_ops xfrmi_netdev_ops = { static void xfrmi_dev_setup(struct net_device *dev) { dev->netdev_ops = &xfrmi_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->type = ARPHRD_NONE; dev->mtu = ETH_DATA_LEN; dev->min_mtu = ETH_MIN_MTU; From f2ca673d2cd5df9a76247b670e9ffd4d63682b3f Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 30 Jun 2020 11:04:40 +0100 Subject: [PATCH 096/343] net: mvneta: fix use of state->speed When support for short preambles was added, it incorrectly keyed its decision off state->speed instead of state->interface. state->speed is not guaranteed to be correct for in-band modes, which can lead to short preambles being unexpectedly disabled. Fix this by keying off the interface mode, which is the only way that mvneta can operate at 2.5Gbps. Fixes: da58a931f248 ("net: mvneta: Add support for 2500Mbps SGMII") Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c639e3a29302..7d5d9d34f4e4 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3959,7 +3959,7 @@ static void mvneta_mac_config(struct phylink_config *config, unsigned int mode, /* When at 2.5G, the link partner can send frames with shortened * preambles. */ - if (state->speed == SPEED_2500) + if (state->interface == PHY_INTERFACE_MODE_2500BASEX) new_ctrl4 |= MVNETA_GMAC4_SHORT_PREAMBLE_ENABLE; if (pp->phy_interface != state->interface) { From 5468cbcddf47f674829c6ada190283108a63d7b5 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 30 Jun 2020 07:44:42 -0500 Subject: [PATCH 097/343] net: ipa: always check for stopped channel In gsi_channel_stop(), there's a check to see if the channel might have entered STOPPED state since a previous call, which might have timed out before stopping completed. That check actually belongs in gsi_channel_stop_command(), which is called repeatedly by gsi_channel_stop() for RX channels. Fixes: 650d1603825d ("soc: qcom: ipa: the generic software interface") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/gsi.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 55226b264e3c..ac7e5a04c8ac 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -500,6 +500,13 @@ static int gsi_channel_stop_command(struct gsi_channel *channel) int ret; state = gsi_channel_state(channel); + + /* Channel could have entered STOPPED state since last call + * if it timed out. If so, we're done. + */ + if (state == GSI_CHANNEL_STATE_STOPPED) + return 0; + if (state != GSI_CHANNEL_STATE_STARTED && state != GSI_CHANNEL_STATE_STOP_IN_PROC) return -EINVAL; @@ -789,20 +796,11 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id) int gsi_channel_stop(struct gsi *gsi, u32 channel_id) { struct gsi_channel *channel = &gsi->channel[channel_id]; - enum gsi_channel_state state; u32 retries; int ret; gsi_channel_freeze(channel); - /* Channel could have entered STOPPED state since last call if the - * STOP command timed out. We won't stop a channel if stopping it - * was successful previously (so we still want the freeze above). - */ - state = gsi_channel_state(channel); - if (state == GSI_CHANNEL_STATE_STOPPED) - return 0; - /* RX channels might require a little time to enter STOPPED state */ retries = channel->toward_ipa ? 0 : GSI_CHANNEL_STOP_RX_RETRIES; From 41af5436e857ec64f302fcc9b6e4a8c526b6b402 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 30 Jun 2020 07:44:43 -0500 Subject: [PATCH 098/343] net: ipa: no checksum offload for SDM845 LAN RX The AP LAN RX endpoint should not have download checksum offload enabled. The receive handler does properly accommodate the trailer that's added by the hardware, but we ignore it. Fixes: 1ed7d0c0fdba ("soc: qcom: ipa: configuration data") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_data-sdm845.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c index 52d4b84e0dac..de2768d71ab5 100644 --- a/drivers/net/ipa/ipa_data-sdm845.c +++ b/drivers/net/ipa/ipa_data-sdm845.c @@ -44,7 +44,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .seq_type = IPA_SEQ_INVALID, .config = { - .checksum = true, .aggregation = true, .status_enable = true, .rx = { From 6cb63ea6a39eac9640d109f274a237b34350c183 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 30 Jun 2020 07:44:44 -0500 Subject: [PATCH 099/343] net: ipa: introduce ipa_cmd_tag_process() Create a new function ipa_cmd_tag_process() that simply allocates a transaction, adds a tag process command to it to clear the hardware pipeline, and commits the transaction. Call it in from ipa_endpoint_suspend(), after suspending the modem endpoints but before suspending the AP command TX and AP LAN RX endpoints (which are used by the tag sequence). Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_cmd.c | 15 +++++++++++++++ drivers/net/ipa/ipa_cmd.h | 8 ++++++++ drivers/net/ipa/ipa_endpoint.c | 2 ++ 3 files changed, 25 insertions(+) diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index c9ab865e7290..d92dd3f09b73 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -586,6 +586,21 @@ u32 ipa_cmd_tag_process_count(void) return 4; } +void ipa_cmd_tag_process(struct ipa *ipa) +{ + u32 count = ipa_cmd_tag_process_count(); + struct gsi_trans *trans; + + trans = ipa_cmd_trans_alloc(ipa, count); + if (trans) { + ipa_cmd_tag_process_add(trans); + gsi_trans_commit_wait(trans); + } else { + dev_err(&ipa->pdev->dev, + "error allocating %u entry tag transaction\n", count); + } +} + static struct ipa_cmd_info * ipa_cmd_info_alloc(struct ipa_endpoint *endpoint, u32 tre_count) { diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h index e440aa69c8b5..1a646e0264a0 100644 --- a/drivers/net/ipa/ipa_cmd.h +++ b/drivers/net/ipa/ipa_cmd.h @@ -171,6 +171,14 @@ void ipa_cmd_tag_process_add(struct gsi_trans *trans); */ u32 ipa_cmd_tag_process_count(void); +/** + * ipa_cmd_tag_process() - Perform a tag process + * + * @Return: The number of elements to allocate in a transaction + * to hold tag process commands + */ +void ipa_cmd_tag_process(struct ipa *ipa); + /** * ipa_cmd_trans_alloc() - Allocate a transaction for the command TX endpoint * @ipa: IPA pointer diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 9f50d0d11704..9e58e495d373 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1450,6 +1450,8 @@ void ipa_endpoint_suspend(struct ipa *ipa) if (ipa->modem_netdev) ipa_modem_suspend(ipa->modem_netdev); + ipa_cmd_tag_process(ipa); + ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]); ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]); } From 01c66c48d4f0825a202d4163800b706a1d2ec7ad Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 30 Jun 2020 10:12:40 -0700 Subject: [PATCH 100/343] bpf: Fix an incorrect branch elimination by verifier Wenbo reported an issue in [1] where a checking of null pointer is evaluated as always false. In this particular case, the program type is tp_btf and the pointer to compare is a PTR_TO_BTF_ID. The current verifier considers PTR_TO_BTF_ID always reprents a non-null pointer, hence all PTR_TO_BTF_ID compares to 0 will be evaluated as always not-equal, which resulted in the branch elimination. For example, struct bpf_fentry_test_t { struct bpf_fentry_test_t *a; }; int BPF_PROG(test7, struct bpf_fentry_test_t *arg) { if (arg == 0) test7_result = 1; return 0; } int BPF_PROG(test8, struct bpf_fentry_test_t *arg) { if (arg->a == 0) test8_result = 1; return 0; } In above bpf programs, both branch arg == 0 and arg->a == 0 are removed. This may not be what developer expected. The bug is introduced by Commit cac616db39c2 ("bpf: Verifier track null pointer branch_taken with JNE and JEQ"), where PTR_TO_BTF_ID is considered to be non-null when evaluting pointer vs. scalar comparison. This may be added considering we have PTR_TO_BTF_ID_OR_NULL in the verifier as well. PTR_TO_BTF_ID_OR_NULL is added to explicitly requires a non-NULL testing in selective cases. The current generic pointer tracing framework in verifier always assigns PTR_TO_BTF_ID so users does not need to check NULL pointer at every pointer level like a->b->c->d. We may not want to assign every PTR_TO_BTF_ID as PTR_TO_BTF_ID_OR_NULL as this will require a null test before pointer dereference which may cause inconvenience for developers. But we could avoid branch elimination to preserve original code intention. This patch simply removed PTR_TO_BTD_ID from reg_type_not_null() in verifier, which prevented the above branches from being eliminated. [1]: https://lore.kernel.org/bpf/79dbb7c0-449d-83eb-5f4f-7af0cc269168@fb.com/T/ Fixes: cac616db39c2 ("bpf: Verifier track null pointer branch_taken with JNE and JEQ") Reported-by: Wenbo Zhang Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200630171240.2523722-1-yhs@fb.com --- kernel/bpf/verifier.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8911d0576399..94cead5a43e5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -399,8 +399,7 @@ static bool reg_type_not_null(enum bpf_reg_type type) return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || type == PTR_TO_MAP_VALUE || - type == PTR_TO_SOCK_COMMON || - type == PTR_TO_BTF_ID; + type == PTR_TO_SOCK_COMMON; } static bool reg_type_may_be_null(enum bpf_reg_type type) From d923021c2ce12acb50dc7086a1bf66eed82adf6a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 30 Jun 2020 10:12:41 -0700 Subject: [PATCH 101/343] bpf: Add tests for PTR_TO_BTF_ID vs. null comparison Add two tests for PTR_TO_BTF_ID vs. null ptr comparison, one for PTR_TO_BTF_ID in the ctx structure and the other for PTR_TO_BTF_ID after one level pointer chasing. In both cases, the test ensures condition is not removed. For example, for this test struct bpf_fentry_test_t { struct bpf_fentry_test_t *a; }; int BPF_PROG(test7, struct bpf_fentry_test_t *arg) { if (arg == 0) test7_result = 1; return 0; } Before the previous verifier change, we have xlated codes: int test7(long long unsigned int * ctx): ; int BPF_PROG(test7, struct bpf_fentry_test_t *arg) 0: (79) r1 = *(u64 *)(r1 +0) ; int BPF_PROG(test7, struct bpf_fentry_test_t *arg) 1: (b4) w0 = 0 2: (95) exit After the previous verifier change, we have: int test7(long long unsigned int * ctx): ; int BPF_PROG(test7, struct bpf_fentry_test_t *arg) 0: (79) r1 = *(u64 *)(r1 +0) ; if (arg == 0) 1: (55) if r1 != 0x0 goto pc+4 ; test7_result = 1; 2: (18) r1 = map[id:6][0]+48 4: (b7) r2 = 1 5: (7b) *(u64 *)(r1 +0) = r2 ; int BPF_PROG(test7, struct bpf_fentry_test_t *arg) 6: (b4) w0 = 0 7: (95) exit Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200630171241.2523875-1-yhs@fb.com --- net/bpf/test_run.c | 19 +++++++++++++++- .../selftests/bpf/prog_tests/fentry_fexit.c | 2 +- .../testing/selftests/bpf/progs/fentry_test.c | 22 +++++++++++++++++++ .../testing/selftests/bpf/progs/fexit_test.c | 22 +++++++++++++++++++ 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index bfd4ccd80847..b03c469cd01f 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -147,6 +147,20 @@ int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) return a + (long)b + c + d + (long)e + f; } +struct bpf_fentry_test_t { + struct bpf_fentry_test_t *a; +}; + +int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg) +{ + return (long)arg; +} + +int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg) +{ + return (long)arg->a; +} + int noinline bpf_modify_return_test(int a, int *b) { *b += 1; @@ -185,6 +199,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { + struct bpf_fentry_test_t arg = {}; u16 side_effect = 0, ret = 0; int b = 2, err = -EFAULT; u32 retval = 0; @@ -197,7 +212,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, bpf_fentry_test3(4, 5, 6) != 15 || bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || - bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 || + bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 || + bpf_fentry_test8(&arg) != 0) goto out; break; case BPF_MODIFY_RETURN: diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 83493bd5745c..109d0345a2be 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -36,7 +36,7 @@ void test_fentry_fexit(void) fentry_res = (__u64 *)fentry_skel->bss; fexit_res = (__u64 *)fexit_skel->bss; printf("%lld\n", fentry_skel->bss->test1_result); - for (i = 0; i < 6; i++) { + for (i = 0; i < 8; i++) { CHECK(fentry_res[i] != 1, "result", "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]); CHECK(fexit_res[i] != 1, "result", diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c index 9365b686f84b..5f645fdaba6f 100644 --- a/tools/testing/selftests/bpf/progs/fentry_test.c +++ b/tools/testing/selftests/bpf/progs/fentry_test.c @@ -55,3 +55,25 @@ int BPF_PROG(test6, __u64 a, void *b, short c, int d, void * e, __u64 f) e == (void *)20 && f == 21; return 0; } + +struct bpf_fentry_test_t { + struct bpf_fentry_test_t *a; +}; + +__u64 test7_result = 0; +SEC("fentry/bpf_fentry_test7") +int BPF_PROG(test7, struct bpf_fentry_test_t *arg) +{ + if (arg == 0) + test7_result = 1; + return 0; +} + +__u64 test8_result = 0; +SEC("fentry/bpf_fentry_test8") +int BPF_PROG(test8, struct bpf_fentry_test_t *arg) +{ + if (arg->a == 0) + test8_result = 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c index bd1e17d8024c..0952affb22a6 100644 --- a/tools/testing/selftests/bpf/progs/fexit_test.c +++ b/tools/testing/selftests/bpf/progs/fexit_test.c @@ -56,3 +56,25 @@ int BPF_PROG(test6, __u64 a, void *b, short c, int d, void *e, __u64 f, int ret) e == (void *)20 && f == 21 && ret == 111; return 0; } + +struct bpf_fentry_test_t { + struct bpf_fentry_test *a; +}; + +__u64 test7_result = 0; +SEC("fexit/bpf_fentry_test7") +int BPF_PROG(test7, struct bpf_fentry_test_t *arg) +{ + if (arg == 0) + test7_result = 1; + return 0; +} + +__u64 test8_result = 0; +SEC("fexit/bpf_fentry_test8") +int BPF_PROG(test8, struct bpf_fentry_test_t *arg) +{ + if (arg->a == 0) + test8_result = 1; + return 0; +} From 8a259e6b73ad8181b0b2ef338b35043433db1075 Mon Sep 17 00:00:00 2001 From: Li Heng Date: Mon, 29 Jun 2020 18:49:51 +0800 Subject: [PATCH 102/343] net: cxgb4: fix return error value in t4_prep_fw t4_prep_fw goto bye tag with positive return value when something bad happened and which can not free resource in adap_init0. so fix it to return negative value. Fixes: 16e47624e76b ("cxgb4: Add new scheme to update T4/T5 firmware") Reported-by: Hulk Robot Signed-off-by: Li Heng Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 1aa6dc10dc0b..ad522f822cc2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3493,7 +3493,7 @@ int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info, drv_fw = &fw_info->fw_hdr; /* Read the header of the firmware on the card */ - ret = -t4_read_flash(adap, FLASH_FW_START, + ret = t4_read_flash(adap, FLASH_FW_START, sizeof(*card_fw) / sizeof(uint32_t), (uint32_t *)card_fw, 1); if (ret == 0) { @@ -3522,8 +3522,8 @@ int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info, should_install_fs_fw(adap, card_fw_usable, be32_to_cpu(fs_fw->fw_ver), be32_to_cpu(card_fw->fw_ver))) { - ret = -t4_fw_upgrade(adap, adap->mbox, fw_data, - fw_size, 0); + ret = t4_fw_upgrade(adap, adap->mbox, fw_data, + fw_size, 0); if (ret != 0) { dev_err(adap->pdev_dev, "failed to install firmware: %d\n", ret); @@ -3554,7 +3554,7 @@ int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info, FW_HDR_FW_VER_MICRO_G(c), FW_HDR_FW_VER_BUILD_G(c), FW_HDR_FW_VER_MAJOR_G(k), FW_HDR_FW_VER_MINOR_G(k), FW_HDR_FW_VER_MICRO_G(k), FW_HDR_FW_VER_BUILD_G(k)); - ret = EINVAL; + ret = -EINVAL; goto bye; } From 28541f3d324f6de1e545e2875283b6cef95c5d36 Mon Sep 17 00:00:00 2001 From: Carl Huang Date: Tue, 30 Jun 2020 14:52:51 +0800 Subject: [PATCH 103/343] net: qrtr: free flow in __qrtr_node_release The flow is allocated in qrtr_tx_wait, but not freed when qrtr node is released. (*slot) becomes NULL after radix_tree_iter_delete is called in __qrtr_node_release. The fix is to save (*slot) to a vairable and then free it. This memory leak is catched when kmemleak is enabled in kernel, the report looks like below: unreferenced object 0xffffa0de69e08420 (size 32): comm "kworker/u16:3", pid 176, jiffies 4294918275 (age 82858.876s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 28 84 e0 69 de a0 ff ff ........(..i.... 28 84 e0 69 de a0 ff ff 03 00 00 00 00 00 00 00 (..i............ backtrace: [<00000000e252af0a>] qrtr_node_enqueue+0x38e/0x400 [qrtr] [<000000009cea437f>] qrtr_sendmsg+0x1e0/0x2a0 [qrtr] [<000000008bddbba4>] sock_sendmsg+0x5b/0x60 [<0000000003beb43a>] qmi_send_message.isra.3+0xbe/0x110 [qmi_helpers] [<000000009c9ae7de>] qmi_send_request+0x1c/0x20 [qmi_helpers] Signed-off-by: Carl Huang Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 2d8d6131bc5f..059881330788 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -166,6 +166,7 @@ static void __qrtr_node_release(struct kref *kref) { struct qrtr_node *node = container_of(kref, struct qrtr_node, ref); struct radix_tree_iter iter; + struct qrtr_tx_flow *flow; unsigned long flags; void __rcu **slot; @@ -181,8 +182,9 @@ static void __qrtr_node_release(struct kref *kref) /* Free tx flow counters */ radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) { + flow = *slot; radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot); - kfree(*slot); + kfree(flow); } kfree(node); } From 6a2febec338df7e7699a52d00b2e1207dcf65b28 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Jun 2020 16:41:01 -0700 Subject: [PATCH 104/343] tcp: md5: add missing memory barriers in tcp_md5_do_add()/tcp_md5_hash_key() MD5 keys are read with RCU protection, and tcp_md5_do_add() might update in-place a prior key. Normally, typical RCU updates would allocate a new piece of memory. In this case only key->key and key->keylen might be updated, and we do not care if an incoming packet could see the old key, the new one, or some intermediate value, since changing the key on a live flow is known to be problematic anyway. We only want to make sure that in the case key->keylen is changed, cpus in tcp_md5_hash_key() wont try to use uninitialized data, or crash because key->keylen was read twice to feed sg_init_one() and ahash_request_set_crypt() Fixes: 9ea88a153001 ("tcp: md5: check md5 signature without socket lock") Signed-off-by: Eric Dumazet Cc: Mathieu Desnoyers Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 7 +++++-- net/ipv4/tcp_ipv4.c | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 810cc164f795..f11166045324 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4033,10 +4033,13 @@ EXPORT_SYMBOL(tcp_md5_hash_skb_data); int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) { + u8 keylen = key->keylen; struct scatterlist sg; - sg_init_one(&sg, key->key, key->keylen); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen); + smp_rmb(); /* paired with smp_wmb() in tcp_md5_do_add() */ + + sg_init_one(&sg, key->key, keylen); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen); return crypto_ahash_update(hp->md5_req); } EXPORT_SYMBOL(tcp_md5_hash_key); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad6435ba6d72..99916fcc15ca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1113,6 +1113,9 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, if (key) { /* Pre-existing entry - just update that one. */ memcpy(key->key, newkey, newkeylen); + + smp_wmb(); /* pairs with smp_rmb() in tcp_md5_hash_key() */ + key->keylen = newkeylen; return 0; } From 8ff41cc21714704ef0158a546c3c4d07fae2c952 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Jun 2020 14:46:15 +0300 Subject: [PATCH 105/343] net: qrtr: Fix an out of bounds read qrtr_endpoint_post() This code assumes that the user passed in enough data for a qrtr_hdr_v1 or qrtr_hdr_v2 struct, but it's not necessarily true. If the buffer is too small then it will read beyond the end. Reported-by: Manivannan Sadhasivam Reported-by: syzbot+b8fe393f999a291a9ea6@syzkaller.appspotmail.com Fixes: 194ccc88297a ("net: qrtr: Support decoding incoming v2 packets") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 059881330788..24a8c3c6da0d 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -429,7 +429,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) unsigned int ver; size_t hdrlen; - if (len & 3) + if (len == 0 || len & 3) return -EINVAL; skb = netdev_alloc_skb(NULL, len); @@ -443,6 +443,8 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) switch (ver) { case QRTR_PROTO_VER_1: + if (len < sizeof(*v1)) + goto err; v1 = data; hdrlen = sizeof(*v1); @@ -456,6 +458,8 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) size = le32_to_cpu(v1->size); break; case QRTR_PROTO_VER_2: + if (len < sizeof(*v2)) + goto err; v2 = data; hdrlen = sizeof(*v2) + v2->optlen; From 5aa98879efe77d33d1639e006d4b0c1579cde9f6 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 26 Jun 2020 11:24:34 +0200 Subject: [PATCH 106/343] s390/cpum_sf: prohibit callchain data collection CPU Measurement sampling facility on s390 does not support perf tool collection of callchain data using --call-graph option. The sampling facility collects samples in a ring buffer which includes only the instruction address the samples were taken. When the ring buffer hits a watermark, a measurement alert interrupt is triggered and handled by the performance measurement unit (PMU) device driver. It collects the samples and feeds each sample to the perf ring buffer in the common code via functions perf_prepare_sample()/perf_output_sample(). When function perf_prepare_sample() is called to collect sample data's callchain, user register values or stack area, invalid data is picked, because the context of the collected information does not match the context when the sample was taken. There is currently no way to provide the callchain and other information, because the hardware sampler does not collect this information. Therefore prohibit sampling when the user requests a callchain graph from the hardware sampler. Return -EOPNOTSUPP to the user in this case. If call chains are really wanted, users need to specify software event cpu-clock to get the callchain information from a software event. Signed-off-by: Thomas Richter Reviewed-by: Sumanth Korikkar Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_sf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 85a711d783eb..4f9e4626df55 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -881,12 +881,21 @@ static int __hw_perf_event_init(struct perf_event *event) return err; } +static bool is_callchain_event(struct perf_event *event) +{ + u64 sample_type = event->attr.sample_type; + + return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_STACK_USER); +} + static int cpumsf_pmu_event_init(struct perf_event *event) { int err; /* No support for taken branch sampling */ - if (has_branch_stack(event)) + /* No support for callchain, stacks and registers */ + if (has_branch_stack(event) || is_callchain_event(event)) return -EOPNOTSUPP; switch (event->attr.type) { From 9e9f85e029a2ee4167aacf3ff04e4288a5e5c74e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Jul 2020 07:32:13 +0200 Subject: [PATCH 107/343] s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 44 ++++++++++++++++++++++------ arch/s390/configs/defconfig | 43 ++++++++++++++++++++------- arch/s390/configs/zfcpdump_defconfig | 5 ++++ 3 files changed, 73 insertions(+), 19 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 46038bc58c9e..0cf9a82326a8 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -1,5 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y @@ -14,7 +15,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y @@ -31,9 +31,9 @@ CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y -CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_BPF_LSM=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -51,14 +51,11 @@ CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y -CONFIG_HIBERNATION=y -CONFIG_PM_DEBUG=y CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m -CONFIG_VHOST_NET=m -CONFIG_VHOST_VSOCK=m +CONFIG_S390_UNWIND_SELFTEST=y CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y @@ -77,6 +74,8 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_INLINE_ENCRYPTION=y +CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -96,7 +95,6 @@ CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y -CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y @@ -130,6 +128,7 @@ CONFIG_SYN_COOKIES=y CONFIG_NET_IPVTI=m CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESPINTCP=y CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m @@ -144,6 +143,7 @@ CONFIG_TCP_CONG_ILLINOIS=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESPINTCP=y CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m CONFIG_IPV6_VTI=m @@ -151,7 +151,10 @@ CONFIG_IPV6_SIT=m CONFIG_IPV6_GRE=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y +CONFIG_IPV6_RPL_LWTUNNEL=y +CONFIG_MPTCP=y CONFIG_NETFILTER=y +CONFIG_BRIDGE_NETFILTER=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y @@ -317,6 +320,7 @@ CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m CONFIG_BRIDGE=m +CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y CONFIG_NET_SCHED=y @@ -341,6 +345,7 @@ CONFIG_NET_SCH_CODEL=m CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m @@ -364,6 +369,7 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_GATE=m CONFIG_DNS_RESOLVER=y CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m @@ -374,6 +380,7 @@ CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m # CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y +# CONFIG_PCIEASPM is not set CONFIG_PCI_DEBUG=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y @@ -435,6 +442,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_MULTIPATH_HST=m CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m @@ -448,6 +456,8 @@ CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_BAREUDP=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m @@ -481,7 +491,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -# CONFIG_MLXFW is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -514,6 +523,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_TI is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_XILINX is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -561,6 +571,8 @@ CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y +CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -608,6 +620,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_NTFS_FS=m CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y @@ -650,8 +663,8 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y -CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_FORTIFY_SOURCE=y @@ -675,8 +688,11 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m +CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m @@ -685,6 +701,7 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -701,6 +718,7 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m @@ -719,6 +737,9 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -774,6 +795,7 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y +CONFIG_TEST_LOCKUP=m CONFIG_DEBUG_TIMEKEEPING=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y @@ -786,7 +808,9 @@ CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 +# CONFIG_RCU_TRACE is not set CONFIG_LATENCYTOP=y +CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y CONFIG_IRQSOFF_TRACER=y @@ -808,10 +832,12 @@ CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m CONFIG_TEST_LIST_SORT=y +CONFIG_TEST_MIN_HEAP=y CONFIG_TEST_SORT=y CONFIG_KPROBES_SANITY_TEST=y CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y +CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 7cd0648c1f4e..5df9759e8ff6 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -1,5 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y @@ -13,7 +14,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y @@ -30,9 +30,9 @@ CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y -CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_BPF_LSM=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -41,7 +41,6 @@ CONFIG_LIVEPATCH=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y -# CONFIG_NUMA_EMU is not set CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y @@ -51,14 +50,11 @@ CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y -CONFIG_HIBERNATION=y -CONFIG_PM_DEBUG=y CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m -CONFIG_VHOST_NET=m -CONFIG_VHOST_VSOCK=m +CONFIG_S390_UNWIND_SELFTEST=m CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y @@ -74,6 +70,8 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_INLINE_ENCRYPTION=y +CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -91,7 +89,6 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y -CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y @@ -125,6 +122,7 @@ CONFIG_SYN_COOKIES=y CONFIG_NET_IPVTI=m CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESPINTCP=y CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m @@ -139,6 +137,7 @@ CONFIG_TCP_CONG_ILLINOIS=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESPINTCP=y CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m CONFIG_IPV6_VTI=m @@ -146,7 +145,10 @@ CONFIG_IPV6_SIT=m CONFIG_IPV6_GRE=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y +CONFIG_IPV6_RPL_LWTUNNEL=y +CONFIG_MPTCP=y CONFIG_NETFILTER=y +CONFIG_BRIDGE_NETFILTER=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y @@ -311,6 +313,7 @@ CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m CONFIG_BRIDGE=m +CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y CONFIG_NET_SCHED=y @@ -335,6 +338,7 @@ CONFIG_NET_SCH_CODEL=m CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m @@ -358,6 +362,7 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_GATE=m CONFIG_DNS_RESOLVER=y CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m @@ -368,6 +373,7 @@ CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m # CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y +# CONFIG_PCIEASPM is not set CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y @@ -430,6 +436,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_MULTIPATH_HST=m CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m @@ -444,6 +451,8 @@ CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_BAREUDP=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m @@ -477,7 +486,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -# CONFIG_MLXFW is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -510,6 +518,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_TI is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_XILINX is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -557,6 +566,8 @@ CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y +CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -600,6 +611,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_NTFS_FS=m CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y @@ -642,8 +654,8 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y -CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y @@ -667,8 +679,11 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m +CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_OFB=m @@ -678,6 +693,7 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -694,6 +710,7 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m @@ -712,6 +729,9 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -725,6 +745,7 @@ CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CORDIC=m +CONFIG_PRIME_NUMBERS=m CONFIG_CRC4=m CONFIG_CRC7=m CONFIG_CRC8=m @@ -739,10 +760,12 @@ CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y +CONFIG_TEST_LOCKUP=m CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y +CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y CONFIG_SCHED_TRACER=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 20c51e5d9353..4091c50449cd 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -30,6 +30,7 @@ CONFIG_IBM_PARTITION=y # CONFIG_BOUNCE is not set CONFIG_NET=y # CONFIG_IUCV is not set +# CONFIG_ETHTOOL_NETLINK is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_RAM=y # CONFIG_BLK_DEV_XPRAM is not set @@ -55,6 +56,8 @@ CONFIG_RAW_DRIVER=y # CONFIG_MONWRITER is not set # CONFIG_S390_VMUR is not set # CONFIG_HID is not set +# CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set @@ -62,7 +65,9 @@ CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" +# CONFIG_ZLIB_DFLTCC is not set CONFIG_PRINTK_TIME=y +# CONFIG_SYMBOLIC_ERRNAME is not set CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y From d3c54f7f18cb82283777998c4bec52e49241c783 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Wed, 1 Jul 2020 11:16:33 +0800 Subject: [PATCH 108/343] hinic: fix passing non negative value to ERR_PTR get_dev_cap and set_resources_state functions may return a positive value because of hardware failure, and the positive return value can not be passed to ERR_PTR directly. Fixes: 7dd29ee12865 ("hinic: add sriov feature support") Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 0245da02efbb..b735bc537508 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -814,6 +814,8 @@ struct hinic_hwdev *hinic_init_hwdev(struct pci_dev *pdev) err_init_msix: err_pfhwdev_alloc: hinic_free_hwif(hwif); + if (err > 0) + err = -EIO; return ERR_PTR(err); } From 1e82a62fec613844da9e558f3493540a5b7a7b67 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 30 Jun 2020 11:50:17 -0600 Subject: [PATCH 109/343] genetlink: remove genl_bind A potential deadlock can occur during registering or unregistering a new generic netlink family between the main nl_table_lock and the cb_lock where each thread wants the lock held by the other, as demonstrated below. 1) Thread 1 is performing a netlink_bind() operation on a socket. As part of this call, it will call netlink_lock_table(), incrementing the nl_table_users count to 1. 2) Thread 2 is registering (or unregistering) a genl_family via the genl_(un)register_family() API. The cb_lock semaphore will be taken for writing. 3) Thread 1 will call genl_bind() as part of the bind operation to handle subscribing to GENL multicast groups at the request of the user. It will attempt to take the cb_lock semaphore for reading, but it will fail and be scheduled away, waiting for Thread 2 to finish the write. 4) Thread 2 will call netlink_table_grab() during the (un)registration call. However, as Thread 1 has incremented nl_table_users, it will not be able to proceed, and both threads will be stuck waiting for the other. genl_bind() is a noop, unless a genl_family implements the mcast_bind() function to handle setting up family-specific multicast operations. Since no one in-tree uses this functionality as Cong pointed out, simply removing the genl_bind() function will remove the possibility for deadlock, as there is no attempt by Thread 1 above to take the cb_lock semaphore. Fixes: c380d9a7afff ("genetlink: pass multicast bind/unbind to families") Suggested-by: Cong Wang Acked-by: Johannes Berg Reported-by: kernel test robot Signed-off-by: Sean Tranchetti Signed-off-by: David S. Miller --- include/net/genetlink.h | 8 ------- net/netlink/genetlink.c | 49 ----------------------------------------- 2 files changed, 57 deletions(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ad71ed4f55ff..6e5f1e1aa822 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -35,12 +35,6 @@ struct genl_info; * do additional, common, filtering and return an error * @post_doit: called after an operation's doit callback, it may * undo operations done by pre_doit, for example release locks - * @mcast_bind: a socket bound to the given multicast group (which - * is given as the offset into the groups array) - * @mcast_unbind: a socket was unbound from the given multicast group. - * Note that unbind() will not be called symmetrically if the - * generic netlink family is removed while there are still open - * sockets. * @mcgrps: multicast groups used by this family * @n_mcgrps: number of multicast groups * @mcgrp_offset: starting number of multicast group IDs in this family @@ -63,8 +57,6 @@ struct genl_family { void (*post_doit)(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); - int (*mcast_bind)(struct net *net, int group); - void (*mcast_unbind)(struct net *net, int group); const struct genl_ops * ops; const struct genl_multicast_group *mcgrps; unsigned int n_ops; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a914b9365a46..9395ee8a868d 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1144,60 +1144,11 @@ static struct genl_family genl_ctrl __ro_after_init = { .netnsok = true, }; -static int genl_bind(struct net *net, int group) -{ - struct genl_family *f; - int err = -ENOENT; - unsigned int id; - - down_read(&cb_lock); - - idr_for_each_entry(&genl_fam_idr, f, id) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; - - if (!f->netnsok && net != &init_net) - err = -ENOENT; - else if (f->mcast_bind) - err = f->mcast_bind(net, fam_grp); - else - err = 0; - break; - } - } - up_read(&cb_lock); - - return err; -} - -static void genl_unbind(struct net *net, int group) -{ - struct genl_family *f; - unsigned int id; - - down_read(&cb_lock); - - idr_for_each_entry(&genl_fam_idr, f, id) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; - - if (f->mcast_unbind) - f->mcast_unbind(net, fam_grp); - break; - } - } - up_read(&cb_lock); -} - static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, - .bind = genl_bind, - .unbind = genl_unbind, }; /* we'll bump the group number right afterwards */ From e6ced831ef11a2a06e8d00aad9d4fc05b610bf38 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Jul 2020 11:43:04 -0700 Subject: [PATCH 110/343] tcp: md5: refine tcp_md5_do_add()/tcp_md5_hash_key() barriers My prior fix went a bit too far, according to Herbert and Mathieu. Since we accept that concurrent TCP MD5 lookups might see inconsistent keys, we can use READ_ONCE()/WRITE_ONCE() instead of smp_rmb()/smp_wmb() Clearing all key->key[] is needed to avoid possible KMSAN reports, if key->keylen is increased. Since tcp_md5_do_add() is not fast path, using __GFP_ZERO to clear all struct tcp_md5sig_key is simpler. data_race() was added in linux-5.8 and will prevent KCSAN reports, this can safely be removed in stable backports, if data_race() is not yet backported. v2: use data_race() both in tcp_md5_hash_key() and tcp_md5_do_add() Fixes: 6a2febec338d ("tcp: md5: add missing memory barriers in tcp_md5_do_add()/tcp_md5_hash_key()") Signed-off-by: Eric Dumazet Cc: Mathieu Desnoyers Cc: Herbert Xu Cc: Marco Elver Reviewed-by: Mathieu Desnoyers Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 8 ++++---- net/ipv4/tcp_ipv4.c | 19 ++++++++++++++----- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f11166045324..c33f7c6aff8e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4033,14 +4033,14 @@ EXPORT_SYMBOL(tcp_md5_hash_skb_data); int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) { - u8 keylen = key->keylen; + u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */ struct scatterlist sg; - smp_rmb(); /* paired with smp_wmb() in tcp_md5_do_add() */ - sg_init_one(&sg, key->key, keylen); ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen); - return crypto_ahash_update(hp->md5_req); + + /* We use data_race() because tcp_md5_do_add() might change key->key under us */ + return data_race(crypto_ahash_update(hp->md5_req)); } EXPORT_SYMBOL(tcp_md5_hash_key); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 99916fcc15ca..04bfcbbfee83 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1111,12 +1111,21 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index); if (key) { - /* Pre-existing entry - just update that one. */ - memcpy(key->key, newkey, newkeylen); + /* Pre-existing entry - just update that one. + * Note that the key might be used concurrently. + * data_race() is telling kcsan that we do not care of + * key mismatches, since changing MD5 key on live flows + * can lead to packet drops. + */ + data_race(memcpy(key->key, newkey, newkeylen)); - smp_wmb(); /* pairs with smp_rmb() in tcp_md5_hash_key() */ + /* Pairs with READ_ONCE() in tcp_md5_hash_key(). + * Also note that a reader could catch new key->keylen value + * but old key->key[], this is the reason we use __GFP_ZERO + * at sock_kmalloc() time below these lines. + */ + WRITE_ONCE(key->keylen, newkeylen); - key->keylen = newkeylen; return 0; } @@ -1132,7 +1141,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, rcu_assign_pointer(tp->md5sig_info, md5sig); } - key = sock_kmalloc(sk, sizeof(*key), gfp); + key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO); if (!key) return -ENOMEM; if (!tcp_alloc_md5sig_pool()) { From 9ef845f894c93416a1cbcbc6ec42525fb06aaf4e Mon Sep 17 00:00:00 2001 From: Rao Shoaib Date: Wed, 1 Jul 2020 12:23:38 -0700 Subject: [PATCH 111/343] rds: If one path needs re-connection, check all and re-connect In testing with mprds enabled, Oracle Cluster nodes after reboot were not able to communicate with others nodes and so failed to rejoin the cluster. Peers with lower IP address initiated connection but the node could not respond as it choose a different path and could not initiate a connection as it had a higher IP address. With this patch, when a node sends out a packet and the selected path is down, all other paths are also checked and any down paths are re-connected. Reviewed-by: Ka-cheong Poon Reviewed-by: David Edmondson Signed-off-by: Somasundaram Krishnasamy Signed-off-by: Rao Shoaib Signed-off-by: David S. Miller --- net/rds/connection.c | 11 +++++++++++ net/rds/rds.h | 7 +++++++ net/rds/send.c | 3 ++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index ed7f2133acc2..f2fcab182095 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -905,6 +905,17 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp) } EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); +/* Check connectivity of all paths + */ +void rds_check_all_paths(struct rds_connection *conn) +{ + int i = 0; + + do { + rds_conn_path_connect_if_down(&conn->c_path[i]); + } while (++i < conn->c_npaths); +} + void rds_conn_connect_if_down(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); diff --git a/net/rds/rds.h b/net/rds/rds.h index 6019b0c004a9..106e862996b9 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -778,6 +778,7 @@ void rds_conn_drop(struct rds_connection *conn); void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy); void rds_conn_connect_if_down(struct rds_connection *conn); void rds_conn_path_connect_if_down(struct rds_conn_path *cp); +void rds_check_all_paths(struct rds_connection *conn); void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, @@ -822,6 +823,12 @@ rds_conn_path_up(struct rds_conn_path *cp) return atomic_read(&cp->cp_state) == RDS_CONN_UP; } +static inline int +rds_conn_path_down(struct rds_conn_path *cp) +{ + return atomic_read(&cp->cp_state) == RDS_CONN_DOWN; +} + static inline int rds_conn_up(struct rds_connection *conn) { diff --git a/net/rds/send.c b/net/rds/send.c index 68e2bdb08fd0..9a529a01cdc6 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1340,7 +1340,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) goto out; } - rds_conn_path_connect_if_down(cpath); + if (rds_conn_path_down(cpath)) + rds_check_all_paths(conn); ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); if (ret) { From e114e1e8ac9d31f25b9dd873bab5d80c1fc482ca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Jul 2020 12:41:23 -0700 Subject: [PATCH 112/343] tcp: md5: do not send silly options in SYNCOOKIES Whenever cookie_init_timestamp() has been used to encode ECN,SACK,WSCALE options, we can not remove the TS option in the SYNACK. Otherwise, tcp_synack_options() will still advertize options like WSCALE that we can not deduce later when receiving the packet from the client to complete 3WHS. Note that modern linux TCP stacks wont use MD5+TS+SACK in a SYN packet, but we can not know for sure that all TCP stacks have the same logic. Before the fix a tcpdump would exhibit this wrong exchange : 10:12:15.464591 IP C > S: Flags [S], seq 4202415601, win 65535, options [nop,nop,md5 valid,mss 1400,sackOK,TS val 456965269 ecr 0,nop,wscale 8], length 0 10:12:15.464602 IP S > C: Flags [S.], seq 253516766, ack 4202415602, win 65535, options [nop,nop,md5 valid,mss 1400,nop,nop,sackOK,nop,wscale 8], length 0 10:12:15.464611 IP C > S: Flags [.], ack 1, win 256, options [nop,nop,md5 valid], length 0 10:12:15.464678 IP C > S: Flags [P.], seq 1:13, ack 1, win 256, options [nop,nop,md5 valid], length 12 10:12:15.464685 IP S > C: Flags [.], ack 13, win 65535, options [nop,nop,md5 valid], length 0 After this patch the exchange looks saner : 11:59:59.882990 IP C > S: Flags [S], seq 517075944, win 65535, options [nop,nop,md5 valid,mss 1400,sackOK,TS val 1751508483 ecr 0,nop,wscale 8], length 0 11:59:59.883002 IP S > C: Flags [S.], seq 1902939253, ack 517075945, win 65535, options [nop,nop,md5 valid,mss 1400,sackOK,TS val 1751508479 ecr 1751508483,nop,wscale 8], length 0 11:59:59.883012 IP C > S: Flags [.], ack 1, win 256, options [nop,nop,md5 valid,nop,nop,TS val 1751508483 ecr 1751508479], length 0 11:59:59.883114 IP C > S: Flags [P.], seq 1:13, ack 1, win 256, options [nop,nop,md5 valid,nop,nop,TS val 1751508483 ecr 1751508479], length 12 11:59:59.883122 IP S > C: Flags [.], ack 13, win 256, options [nop,nop,md5 valid,nop,nop,TS val 1751508483 ecr 1751508483], length 0 11:59:59.883152 IP S > C: Flags [P.], seq 1:13, ack 13, win 256, options [nop,nop,md5 valid,nop,nop,TS val 1751508484 ecr 1751508483], length 12 11:59:59.883170 IP C > S: Flags [.], ack 13, win 256, options [nop,nop,md5 valid,nop,nop,TS val 1751508484 ecr 1751508484], length 0 Of course, no SACK block will ever be added later, but nothing should break. Technically, we could remove the 4 nops included in MD5+TS options, but again some stacks could break seeing not conventional alignment. Fixes: 4957faade11b ("TCPCT part 1g: Responder Cookie => Initiator") Signed-off-by: Eric Dumazet Cc: Florian Westphal Cc: Mathieu Desnoyers Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a50e1990a845..5f5b2f0b0e60 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -700,7 +700,8 @@ static unsigned int tcp_synack_options(const struct sock *sk, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, const struct tcp_md5sig_key *md5, - struct tcp_fastopen_cookie *foc) + struct tcp_fastopen_cookie *foc, + enum tcp_synack_type synack_type) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; @@ -715,7 +716,8 @@ static unsigned int tcp_synack_options(const struct sock *sk, * rather than TS in order to fit in better with old, * buggy kernels, but that was deemed to be unnecessary. */ - ireq->tstamp_ok &= !ireq->sack_ok; + if (synack_type != TCP_SYNACK_COOKIE) + ireq->tstamp_ok &= !ireq->sack_ok; } #endif @@ -3394,7 +3396,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, #endif skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, - foc) + sizeof(*th); + foc, synack_type) + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); From 0da7536fb47f51df89ccfcb1fa09f249d9accec5 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 1 Jul 2020 16:00:06 -0400 Subject: [PATCH 113/343] ip: Fix SO_MARK in RST, ACK and ICMP packets When no full socket is available, skbs are sent over a per-netns control socket. Its sk_mark is temporarily adjusted to match that of the real (request or timewait) socket or to reflect an incoming skb, so that the outgoing skb inherits this in __ip_make_skb. Introduction of the socket cookie mark field broke this. Now the skb is set through the cookie and cork: # init sockc.mark from sk_mark or cmsg ip_append_data ip_setup_cork # convert sockc.mark to cork mark ip_push_pending_frames ip_finish_skb __ip_make_skb # set skb->mark to cork mark But I missed these special control sockets. Update all callers of __ip(6)_make_skb that were originally missed. For IPv6, the same two icmp(v6) paths are affected. The third case is not, as commit 92e55f412cff ("tcp: don't annotate mark on control socket from tcp_v6_send_response()") replaced the ctl_sk->sk_mark with passing the mark field directly as a function argument. That commit predates the commit that introduced the bug. Fixes: c6af0c227a22 ("ip: support SO_MARK cmsg") Signed-off-by: Willem de Bruijn Reported-by: Martin KaFai Lau Reviewed-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 4 ++-- net/ipv4/ip_output.c | 2 +- net/ipv6/icmp.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 956a806649f7..e30515f89802 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ipcm_init(&ipc); inet->tos = ip_hdr(skb)->tos; - sk->sk_mark = mark; + ipc.sockc.mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; saddr = fib_compute_spec_dst(skb); @@ -710,10 +710,10 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; - sk->sk_mark = mark; ipcm_init(&ipc); ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts.opt; + ipc.sockc.mark = mark; rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, type, code, &icmp_param); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 090d3097ee15..17206677d503 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1702,7 +1702,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; sk->sk_sndbuf = sysctl_wmem_default; - sk->sk_mark = fl4.flowi4_mark; + ipc.sockc.mark = fl4.flowi4_mark; err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, &rt, MSG_DONTWAIT); if (unlikely(err)) { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fc5000370030..9df8737ae0d3 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -566,7 +566,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); - sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -583,6 +582,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.flowi6_oif = np->ucast_oif; ipcm6_init_sk(&ipc6, np); + ipc6.sockc.mark = mark; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = icmpv6_route_lookup(net, skb, sk, &fl6); @@ -751,7 +751,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) sk = icmpv6_xmit_lock(net); if (!sk) goto out_bh_enable; - sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) @@ -779,6 +778,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipcm6_init_sk(&ipc6, np); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + ipc6.sockc.mark = mark; if (ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), From ba3bb0e76ccd464bb66665a1941fabe55dadb3ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Jun 2020 13:51:28 -0700 Subject: [PATCH 114/343] tcp: fix SO_RCVLOWAT possible hangs under high mem pressure Whenever tcp_try_rmem_schedule() returns an error, we are under trouble and should make sure to wakeup readers so that they can drain socket queues and eventually make room. Fixes: 03f45c883c6f ("tcp: avoid extra wakeups for SO_RCVLOWAT users") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f3a0eb139b76..9615e72656d1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4582,6 +4582,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); + sk->sk_data_ready(sk); tcp_drop(sk, skb); return; } @@ -4828,6 +4829,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) sk_forced_mem_schedule(sk, skb->truesize); else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP); + sk->sk_data_ready(sk); goto drop; } From e4b9a72d76a47c11677b4bb0dee24a1fb6efb3e9 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Wed, 1 Jul 2020 13:22:20 +0200 Subject: [PATCH 115/343] net: dsa: microchip: enable ksz9893 via i2c in the ksz9477 driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The KSZ9893 3-Port Gigabit Ethernet Switch can be controlled via SPI, I²C or MDIO (very limited and not supported by this driver). While there is already a compatible entry for the SPI bus, it was missing for I²C. Signed-off-by: Helmut Grohne Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz9477_i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index 7d050fab0889..7951f52d860d 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -79,6 +79,7 @@ MODULE_DEVICE_TABLE(i2c, ksz9477_i2c_id); static const struct of_device_id ksz9477_dt_ids[] = { { .compatible = "microchip,ksz9477" }, { .compatible = "microchip,ksz9897" }, + { .compatible = "microchip,ksz9893" }, { .compatible = "microchip,ksz9567" }, {}, }; From c730ae0c6bb3125ccb776fb2ab6abbdff500c02c Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Tue, 16 Jun 2020 15:54:29 -0300 Subject: [PATCH 116/343] btrfs: convert comments to fallthrough annotations Convert fall through comments to the pseudo-keyword which is now the preferred way. Signed-off-by: Marcos Paulo de Souza Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 2 +- fs/btrfs/ref-verify.c | 2 +- fs/btrfs/super.c | 6 +++--- fs/btrfs/volumes.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3a7648bff42c..82ab6e5a386d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1196,7 +1196,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, switch (tm->op) { case MOD_LOG_KEY_REMOVE_WHILE_FREEING: BUG_ON(tm->slot < n); - /* Fallthrough */ + fallthrough; case MOD_LOG_KEY_REMOVE_WHILE_MOVING: case MOD_LOG_KEY_REMOVE: btrfs_set_node_key(eb, &tm->key, tm->slot); diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 7887317033c9..af92525dbb16 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -509,7 +509,7 @@ static int process_leaf(struct btrfs_root *root, switch (key.type) { case BTRFS_EXTENT_ITEM_KEY: *num_bytes = key.offset; - /* fall through */ + fallthrough; case BTRFS_METADATA_ITEM_KEY: *bytenr = key.objectid; ret = process_extent_item(fs_info, path, &key, i, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index bc73fd670702..c3826ae883f0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -523,7 +523,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_compress_force: case Opt_compress_force_type: compress_force = true; - /* Fallthrough */ + fallthrough; case Opt_compress: case Opt_compress_type: saved_compress_type = btrfs_test_opt(info, @@ -622,7 +622,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, btrfs_set_opt(info->mount_opt, NOSSD); btrfs_clear_and_info(info, SSD, "not using ssd optimizations"); - /* Fallthrough */ + fallthrough; case Opt_nossd_spread: btrfs_clear_and_info(info, SSD_SPREAD, "not using spread ssd allocation scheme"); @@ -793,7 +793,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_recovery: btrfs_warn(info, "'recovery' is deprecated, use 'usebackuproot' instead"); - /* fall through */ + fallthrough; case Opt_usebackuproot: btrfs_info(info, "trying to use backup root at mount time"); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f067b5934c46..75af2334b2e3 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -408,7 +408,7 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio) return BTRFS_MAP_WRITE; default: WARN_ON_ONCE(1); - /* fall through */ + fallthrough; case REQ_OP_READ: return BTRFS_MAP_READ; } From 6bf9cd2eed9aee6d742bb9296c994a91f5316949 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 17 Jun 2020 11:35:19 -0700 Subject: [PATCH 117/343] btrfs: fix fatal extent_buffer readahead vs releasepage race Under somewhat convoluted conditions, it is possible to attempt to release an extent_buffer that is under io, which triggers a BUG_ON in btrfs_release_extent_buffer_pages. This relies on a few different factors. First, extent_buffer reads done as readahead for searching use WAIT_NONE, so they free the local extent buffer reference while the io is outstanding. However, they should still be protected by TREE_REF. However, if the system is doing signficant reclaim, and simultaneously heavily accessing the extent_buffers, it is possible for releasepage to race with two concurrent readahead attempts in a way that leaves TREE_REF unset when the readahead extent buffer is released. Essentially, if two tasks race to allocate a new extent_buffer, but the winner who attempts the first io is rebuffed by a page being locked (likely by the reclaim itself) then the loser will still go ahead with issuing the readahead. The loser's call to find_extent_buffer must also race with the reclaim task reading the extent_buffer's refcount as 1 in a way that allows the reclaim to re-clear the TREE_REF checked by find_extent_buffer. The following represents an example execution demonstrating the race: CPU0 CPU1 CPU2 reada_for_search reada_for_search readahead_tree_block readahead_tree_block find_create_tree_block find_create_tree_block alloc_extent_buffer alloc_extent_buffer find_extent_buffer // not found allocates eb lock pages associate pages to eb insert eb into radix tree set TREE_REF, refs == 2 unlock pages read_extent_buffer_pages // WAIT_NONE not uptodate (brand new eb) lock_page if !trylock_page goto unlock_exit // not an error free_extent_buffer release_extent_buffer atomic_dec_and_test refs to 1 find_extent_buffer // found try_release_extent_buffer take refs_lock reads refs == 1; no io atomic_inc_not_zero refs to 2 mark_buffer_accessed check_buffer_tree_ref // not STALE, won't take refs_lock refs == 2; TREE_REF set // no action read_extent_buffer_pages // WAIT_NONE clear TREE_REF release_extent_buffer atomic_dec_and_test refs to 1 unlock_page still not uptodate (CPU1 read failed on trylock_page) locks pages set io_pages > 0 submit io return free_extent_buffer release_extent_buffer dec refs to 0 delete from radix tree btrfs_release_extent_buffer_pages BUG_ON(io_pages > 0)!!! We observe this at a very low rate in production and were also able to reproduce it in a test environment by introducing some spurious delays and by introducing probabilistic trylock_page failures. To fix it, we apply check_tree_ref at a point where it could not possibly be unset by a competing task: after io_pages has been incremented. All the codepaths that clear TREE_REF check for io, so they would not be able to clear it after this point until the io is done. Stack trace, for reference: [1417839.424739] ------------[ cut here ]------------ [1417839.435328] kernel BUG at fs/btrfs/extent_io.c:4841! [1417839.447024] invalid opcode: 0000 [#1] SMP [1417839.502972] RIP: 0010:btrfs_release_extent_buffer_pages+0x20/0x1f0 [1417839.517008] Code: ed e9 ... [1417839.558895] RSP: 0018:ffffc90020bcf798 EFLAGS: 00010202 [1417839.570816] RAX: 0000000000000002 RBX: ffff888102d6def0 RCX: 0000000000000028 [1417839.586962] RDX: 0000000000000002 RSI: ffff8887f0296482 RDI: ffff888102d6def0 [1417839.603108] RBP: ffff88885664a000 R08: 0000000000000046 R09: 0000000000000238 [1417839.619255] R10: 0000000000000028 R11: ffff88885664af68 R12: 0000000000000000 [1417839.635402] R13: 0000000000000000 R14: ffff88875f573ad0 R15: ffff888797aafd90 [1417839.651549] FS: 00007f5a844fa700(0000) GS:ffff88885f680000(0000) knlGS:0000000000000000 [1417839.669810] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1417839.682887] CR2: 00007f7884541fe0 CR3: 000000049f609002 CR4: 00000000003606e0 [1417839.699037] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1417839.715187] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1417839.731320] Call Trace: [1417839.737103] release_extent_buffer+0x39/0x90 [1417839.746913] read_block_for_search.isra.38+0x2a3/0x370 [1417839.758645] btrfs_search_slot+0x260/0x9b0 [1417839.768054] btrfs_lookup_file_extent+0x4a/0x70 [1417839.778427] btrfs_get_extent+0x15f/0x830 [1417839.787665] ? submit_extent_page+0xc4/0x1c0 [1417839.797474] ? __do_readpage+0x299/0x7a0 [1417839.806515] __do_readpage+0x33b/0x7a0 [1417839.815171] ? btrfs_releasepage+0x70/0x70 [1417839.824597] extent_readpages+0x28f/0x400 [1417839.833836] read_pages+0x6a/0x1c0 [1417839.841729] ? startup_64+0x2/0x30 [1417839.849624] __do_page_cache_readahead+0x13c/0x1a0 [1417839.860590] filemap_fault+0x6c7/0x990 [1417839.869252] ? xas_load+0x8/0x80 [1417839.876756] ? xas_find+0x150/0x190 [1417839.884839] ? filemap_map_pages+0x295/0x3b0 [1417839.894652] __do_fault+0x32/0x110 [1417839.902540] __handle_mm_fault+0xacd/0x1000 [1417839.912156] handle_mm_fault+0xaa/0x1c0 [1417839.921004] __do_page_fault+0x242/0x4b0 [1417839.930044] ? page_fault+0x8/0x30 [1417839.937933] page_fault+0x1e/0x30 [1417839.945631] RIP: 0033:0x33c4bae [1417839.952927] Code: Bad RIP value. [1417839.960411] RSP: 002b:00007f5a844f7350 EFLAGS: 00010206 [1417839.972331] RAX: 000000000000006e RBX: 1614b3ff6a50398a RCX: 0000000000000000 [1417839.988477] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000002 [1417840.004626] RBP: 00007f5a844f7420 R08: 000000000000006e R09: 00007f5a94aeccb8 [1417840.020784] R10: 00007f5a844f7350 R11: 0000000000000000 R12: 00007f5a94aecc79 [1417840.036932] R13: 00007f5a94aecc78 R14: 00007f5a94aecc90 R15: 00007f5a94aecc40 CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c59e07360083..95313bb7fe40 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5086,25 +5086,28 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, static void check_buffer_tree_ref(struct extent_buffer *eb) { int refs; - /* the ref bit is tricky. We have to make sure it is set - * if we have the buffer dirty. Otherwise the - * code to free a buffer can end up dropping a dirty - * page + /* + * The TREE_REF bit is first set when the extent_buffer is added + * to the radix tree. It is also reset, if unset, when a new reference + * is created by find_extent_buffer. * - * Once the ref bit is set, it won't go away while the - * buffer is dirty or in writeback, and it also won't - * go away while we have the reference count on the - * eb bumped. + * It is only cleared in two cases: freeing the last non-tree + * reference to the extent_buffer when its STALE bit is set or + * calling releasepage when the tree reference is the only reference. * - * We can't just set the ref bit without bumping the - * ref on the eb because free_extent_buffer might - * see the ref bit and try to clear it. If this happens - * free_extent_buffer might end up dropping our original - * ref by mistake and freeing the page before we are able - * to add one more ref. + * In both cases, care is taken to ensure that the extent_buffer's + * pages are not under io. However, releasepage can be concurrently + * called with creating new references, which is prone to race + * conditions between the calls to check_buffer_tree_ref in those + * codepaths and clearing TREE_REF in try_release_extent_buffer. * - * So bump the ref count first, then set the bit. If someone - * beat us to it, drop the ref we added. + * The actual lifetime of the extent_buffer in the radix tree is + * adequately protected by the refcount, but the TREE_REF bit and + * its corresponding reference are not. To protect against this + * class of races, we call check_buffer_tree_ref from the codepaths + * which trigger io after they set eb->io_pages. Note that once io is + * initiated, TREE_REF can no longer be cleared, so that is the + * moment at which any such race is best fixed. */ refs = atomic_read(&eb->refs); if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) @@ -5555,6 +5558,11 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; atomic_set(&eb->io_pages, num_reads); + /* + * It is possible for releasepage to clear the TREE_REF bit before we + * set io_pages. See check_buffer_tree_ref for a more detailed comment. + */ + check_buffer_tree_ref(eb); for (i = 0; i < num_pages; i++) { page = eb->pages[i]; From 6d548b9e5d56067cff17ff77585167cd65375e4b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 27 Jun 2020 11:40:44 +0100 Subject: [PATCH 118/343] btrfs: fix reclaim_size counter leak after stealing from global reserve Commit 7f9fe614407692 ("btrfs: improve global reserve stealing logic"), added in the 5.8 merge window, introduced another leak for the space_info's reclaim_size counter. This is very often triggered by the test cases generic/269 and generic/416 from fstests, producing a stack trace like the following during unmount: [37079.155499] ------------[ cut here ]------------ [37079.156844] WARNING: CPU: 2 PID: 2000423 at fs/btrfs/block-group.c:3422 btrfs_free_block_groups+0x2eb/0x300 [btrfs] [37079.158090] Modules linked in: dm_snapshot btrfs dm_thin_pool (...) [37079.164440] CPU: 2 PID: 2000423 Comm: umount Tainted: G W 5.7.0-rc7-btrfs-next-62 #1 [37079.165422] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), (...) [37079.167384] RIP: 0010:btrfs_free_block_groups+0x2eb/0x300 [btrfs] [37079.168375] Code: bd 58 ff ff ff 00 4c 8d (...) [37079.170199] RSP: 0018:ffffaa53875c7de0 EFLAGS: 00010206 [37079.171120] RAX: ffff98099e701cf8 RBX: ffff98099e2d4000 RCX: 0000000000000000 [37079.172057] RDX: 0000000000000001 RSI: ffffffffc0acc5b1 RDI: 00000000ffffffff [37079.173002] RBP: ffff98099e701cf8 R08: 0000000000000000 R09: 0000000000000000 [37079.173886] R10: 0000000000000000 R11: 0000000000000000 R12: ffff98099e701c00 [37079.174730] R13: ffff98099e2d5100 R14: dead000000000122 R15: dead000000000100 [37079.175578] FS: 00007f4d7d0a5840(0000) GS:ffff9809ec600000(0000) knlGS:0000000000000000 [37079.176434] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [37079.177289] CR2: 0000559224dcc000 CR3: 000000012207a004 CR4: 00000000003606e0 [37079.178152] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [37079.178935] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [37079.179675] Call Trace: [37079.180419] close_ctree+0x291/0x2d1 [btrfs] [37079.181162] generic_shutdown_super+0x6c/0x100 [37079.181898] kill_anon_super+0x14/0x30 [37079.182641] btrfs_kill_super+0x12/0x20 [btrfs] [37079.183371] deactivate_locked_super+0x31/0x70 [37079.184012] cleanup_mnt+0x100/0x160 [37079.184650] task_work_run+0x68/0xb0 [37079.185284] exit_to_usermode_loop+0xf9/0x100 [37079.185920] do_syscall_64+0x20d/0x260 [37079.186556] entry_SYSCALL_64_after_hwframe+0x49/0xb3 [37079.187197] RIP: 0033:0x7f4d7d2d9357 [37079.187836] Code: eb 0b 00 f7 d8 64 89 01 48 (...) [37079.189180] RSP: 002b:00007ffee4e0d368 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [37079.189845] RAX: 0000000000000000 RBX: 00007f4d7d3fb224 RCX: 00007f4d7d2d9357 [37079.190515] RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 0000559224dc5c90 [37079.191173] RBP: 0000559224dc1970 R08: 0000000000000000 R09: 00007ffee4e0c0e0 [37079.191815] R10: 0000559224dc7b00 R11: 0000000000000246 R12: 0000000000000000 [37079.192451] R13: 0000559224dc5c90 R14: 0000559224dc1a80 R15: 0000559224dc1ba0 [37079.193096] irq event stamp: 0 [37079.193729] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [37079.194379] hardirqs last disabled at (0): [] copy_process+0x755/0x1ea0 [37079.195033] softirqs last enabled at (0): [] copy_process+0x755/0x1ea0 [37079.195700] softirqs last disabled at (0): [<0000000000000000>] 0x0 [37079.196318] ---[ end trace b32710d864dea887 ]--- In the past commit d611add48b717a ("btrfs: fix reclaim counter leak of space_info objects") fixed similar cases. That commit however has a date more recent (April 7 2020) then the commit mentioned before (March 13 2020), however it was merged in kernel 5.7 while the older commit, which introduces a new leak, was merged only in the 5.8 merge window. So the leak sneaked in unnoticed. Fix this by making steal_from_global_rsv() remove the ticket using the helper remove_ticket(), which decrements the reclaim_size counter of the space_info object. Fixes: 7f9fe614407692 ("btrfs: improve global reserve stealing logic") Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/space-info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 41ee88633769..c7bd3fdd7792 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -879,8 +879,8 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info, return false; } global_rsv->reserved -= ticket->bytes; + remove_ticket(space_info, ticket); ticket->bytes = 0; - list_del_init(&ticket->list); wake_up(&ticket->wait); space_info->tickets_id++; if (global_rsv->reserved < global_rsv->size) From 0465337c5599bbe360cdcff452992a1a6b7ed2d4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 30 Jun 2020 14:53:02 -0400 Subject: [PATCH 119/343] btrfs: reset tree root pointer after error in init_tree_roots Eric reported an issue where mounting -o recovery with a fuzzed fs resulted in a kernel panic. This is because we tried to free the tree node, except it was an error from the read. Fix this by properly resetting the tree_root->node == NULL in this case. The panic was the following BTRFS warning (device loop0): failed to read tree root BUG: kernel NULL pointer dereference, address: 000000000000001f RIP: 0010:free_extent_buffer+0xe/0x90 [btrfs] Call Trace: free_root_extent_buffers.part.0+0x11/0x30 [btrfs] free_root_pointers+0x1a/0xa2 [btrfs] open_ctree+0x1776/0x18a5 [btrfs] btrfs_mount_root.cold+0x13/0xfa [btrfs] ? selinux_fs_context_parse_param+0x37/0x80 legacy_get_tree+0x27/0x40 vfs_get_tree+0x25/0xb0 fc_mount+0xe/0x30 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x147/0x3e0 [btrfs] ? cred_has_capability+0x7c/0x120 ? legacy_get_tree+0x27/0x40 legacy_get_tree+0x27/0x40 vfs_get_tree+0x25/0xb0 do_mount+0x735/0xa40 __x64_sys_mount+0x8e/0xd0 do_syscall_64+0x4d/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Nik says: this is problematic only if we fail on the last iteration of the loop as this results in init_tree_roots returning err value with tree_root->node = -ERR. Subsequently the caller does: fail_tree_roots which calls free_root_pointers on the bogus value. Reported-by: Eric Sandeen Fixes: b8522a1e5f42 ("btrfs: Factor out tree roots initialization during mount") CC: stable@vger.kernel.org # 5.5+ Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ add details how the pointer gets dereferenced ] Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f8ec2d8606fd..3d4bb77680e7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2595,10 +2595,12 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) !extent_buffer_uptodate(tree_root->node)) { handle_error = true; - if (IS_ERR(tree_root->node)) + if (IS_ERR(tree_root->node)) { ret = PTR_ERR(tree_root->node); - else if (!extent_buffer_uptodate(tree_root->node)) + tree_root->node = NULL; + } else if (!extent_buffer_uptodate(tree_root->node)) { ret = -EUCLEAN; + } btrfs_warn(fs_info, "failed to read tree root"); continue; From 17f64701ea6f541db7eb5d7423a830cb929b3052 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 20 Jun 2020 17:57:52 +0200 Subject: [PATCH 120/343] drm/meson: viu: fix setting the OSD burst length in VIU_OSD1_FIFO_CTRL_STAT The burst length is configured in VIU_OSD1_FIFO_CTRL_STAT[31] and VIU_OSD1_FIFO_CTRL_STAT[11:10]. The public S905D3 datasheet describes this as: - 0x0 = up to 24 per burst - 0x1 = up to 32 per burst - 0x2 = up to 48 per burst - 0x3 = up to 64 per burst - 0x4 = up to 96 per burst - 0x5 = up to 128 per burst The lower two bits map to VIU_OSD1_FIFO_CTRL_STAT[11:10] while the upper bit maps to VIU_OSD1_FIFO_CTRL_STAT[31]. Replace meson_viu_osd_burst_length_reg() with pre-defined macros which set these values. meson_viu_osd_burst_length_reg() always returned 0 (for the two used values: 32 and 64 at least) and thus incorrectly set the burst size to 24. Fixes: 147ae1cbaa1842 ("drm: meson: viu: use proper macros instead of magic constants") Signed-off-by: Martin Blumenstingl Signed-off-by: Neil Armstrong Reviewed-by: Neil Armstrong Tested-by: Christian Hewitt Link: https://patchwork.freedesktop.org/patch/msgid/20200620155752.21065-1-martin.blumenstingl@googlemail.com --- drivers/gpu/drm/meson/meson_registers.h | 6 ++++++ drivers/gpu/drm/meson/meson_viu.c | 11 ++--------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_registers.h b/drivers/gpu/drm/meson/meson_registers.h index 8ea00546cd4e..049c4bfe2a3a 100644 --- a/drivers/gpu/drm/meson/meson_registers.h +++ b/drivers/gpu/drm/meson/meson_registers.h @@ -261,6 +261,12 @@ #define VIU_OSD_FIFO_DEPTH_VAL(val) ((val & 0x7f) << 12) #define VIU_OSD_WORDS_PER_BURST(words) (((words & 0x4) >> 1) << 22) #define VIU_OSD_FIFO_LIMITS(size) ((size & 0xf) << 24) +#define VIU_OSD_BURST_LENGTH_24 (0x0 << 31 | 0x0 << 10) +#define VIU_OSD_BURST_LENGTH_32 (0x0 << 31 | 0x1 << 10) +#define VIU_OSD_BURST_LENGTH_48 (0x0 << 31 | 0x2 << 10) +#define VIU_OSD_BURST_LENGTH_64 (0x0 << 31 | 0x3 << 10) +#define VIU_OSD_BURST_LENGTH_96 (0x1 << 31 | 0x0 << 10) +#define VIU_OSD_BURST_LENGTH_128 (0x1 << 31 | 0x1 << 10) #define VD1_IF0_GEN_REG 0x1a50 #define VD1_IF0_CANVAS0 0x1a51 diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c index 304f8ff1339c..aede0c67a57f 100644 --- a/drivers/gpu/drm/meson/meson_viu.c +++ b/drivers/gpu/drm/meson/meson_viu.c @@ -411,13 +411,6 @@ void meson_viu_gxm_disable_osd1_afbc(struct meson_drm *priv) priv->io_base + _REG(VIU_MISC_CTRL1)); } -static inline uint32_t meson_viu_osd_burst_length_reg(uint32_t length) -{ - uint32_t val = (((length & 0x80) % 24) / 12); - - return (((val & 0x3) << 10) | (((val & 0x4) >> 2) << 31)); -} - void meson_viu_init(struct meson_drm *priv) { uint32_t reg; @@ -444,9 +437,9 @@ void meson_viu_init(struct meson_drm *priv) VIU_OSD_FIFO_LIMITS(2); /* fifo_lim: 2*16=32 */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) - reg |= meson_viu_osd_burst_length_reg(32); + reg |= VIU_OSD_BURST_LENGTH_32; else - reg |= meson_viu_osd_burst_length_reg(64); + reg |= VIU_OSD_BURST_LENGTH_64; writel_relaxed(reg, priv->io_base + _REG(VIU_OSD1_FIFO_CTRL_STAT)); writel_relaxed(reg, priv->io_base + _REG(VIU_OSD2_FIFO_CTRL_STAT)); From 1393b4aaf9e1e803d59726053d542cebd4e2b5b2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 2 Jul 2020 05:39:31 -0400 Subject: [PATCH 121/343] kvm: use more precise cast and do not drop __user Sparse complains on a call to get_compat_sigset, fix it. The "if" right above explains that sigmask_arg->sigset is basically a compat_sigset_t. Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a852af5c3214..0a68c9d3d3ab 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3350,7 +3350,8 @@ static long kvm_vcpu_compat_ioctl(struct file *filp, if (kvm_sigmask.len != sizeof(compat_sigset_t)) goto out; r = -EFAULT; - if (get_compat_sigset(&sigset, (void *)sigmask_arg->sigset)) + if (get_compat_sigset(&sigset, + (compat_sigset_t __user *)sigmask_arg->sigset)) goto out; r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); } else From e18321acfb9f14d01c03578e1c498e3f815d20a3 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 22 Jun 2020 17:52:24 -0700 Subject: [PATCH 122/343] IB/hfi1: Add explicit cast OPA_MTU_8192 to 'enum ib_mtu' Clang warns: drivers/infiniband/hw/hfi1/qp.c:198:9: warning: implicit conversion from enumeration type 'enum opa_mtu' to different enumeration type 'enum ib_mtu' [-Wenum-conversion] mtu = OPA_MTU_8192; ~ ^~~~~~~~~~~~ enum opa_mtu extends enum ib_mtu. There are typically two ways to deal with this: * Remove the expected types and just use 'int' for all parameters and types. * Explicitly cast the enums between each other. This driver chooses to do the later so do the same thing here. Fixes: 6d72344cf6c4 ("IB/ipoib: Increase ipoib Datagram mode MTU's upper limit") Link: https://lore.kernel.org/r/20200623005224.492239-1-natechancellor@gmail.com Link: https://github.com/ClangBuiltLinux/linux/issues/1062 Link: https://lore.kernel.org/linux-rdma/20200527040350.GA3118979@ubuntu-s3-xlarge-x86/ Signed-off-by: Nathan Chancellor Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 0c2ae9f7b3e8..2f3d9ce077d3 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -195,7 +195,7 @@ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) { /* Constraining 10KB packets to 8KB packets */ if (mtu == (enum ib_mtu)OPA_MTU_10240) - mtu = OPA_MTU_8192; + mtu = (enum ib_mtu)OPA_MTU_8192; return opa_mtu_enum_to_int((enum opa_mtu)mtu); } From f81b4565c1108fb954f344e6b4a153c8189e57fe Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 30 Jun 2020 15:21:47 +0300 Subject: [PATCH 123/343] RDMA/mlx5: Fix legacy IPoIB QP initialization Legacy IPoIB sets IB_QP_CREATE_NETIF_QP QP create flag and because mlx5 doesn't use this flag, the process_create_flags() failed to create IPoIB QPs. Fixes: 2978975ce7f1 ("RDMA/mlx5: Process create QP flags in one place") Link: https://lore.kernel.org/r/20200630122147.445847-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index f939c9b769f0..b316c9cafbc5 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2668,6 +2668,10 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (qp_type == IB_QPT_RAW_PACKET && attr->rwq_ind_tbl) return (create_flags) ? -EINVAL : 0; + process_create_flag(dev, &create_flags, IB_QP_CREATE_NETIF_QP, + mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_BYPASS), + qp); process_create_flag(dev, &create_flags, IB_QP_CREATE_INTEGRITY_EN, MLX5_CAP_GEN(mdev, sho), qp); From 73f9941306d5ce030f3ffc7db425c7b2a798cf8e Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 4 Jun 2020 13:37:10 -0700 Subject: [PATCH 124/343] xtensa: fix __sync_fetch_and_{and,or}_4 declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building xtensa kernel with gcc-10 produces the following warnings: arch/xtensa/kernel/xtensa_ksyms.c:90:15: warning: conflicting types for built-in function ‘__sync_fetch_and_and_4’; expected ‘unsigned int(volatile void *, unsigned int)’ [-Wbuiltin-declaration-mismatch] arch/xtensa/kernel/xtensa_ksyms.c:96:15: warning: conflicting types for built-in function ‘__sync_fetch_and_or_4’; expected ‘unsigned int(volatile void *, unsigned int)’ [-Wbuiltin-declaration-mismatch] Fix declarations of these functions to avoid the warning. Signed-off-by: Max Filippov --- arch/xtensa/kernel/xtensa_ksyms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 4092555828b1..24cf6972eace 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -87,13 +87,13 @@ void __xtensa_libgcc_window_spill(void) } EXPORT_SYMBOL(__xtensa_libgcc_window_spill); -unsigned long __sync_fetch_and_and_4(unsigned long *p, unsigned long v) +unsigned int __sync_fetch_and_and_4(volatile void *p, unsigned int v) { BUG(); } EXPORT_SYMBOL(__sync_fetch_and_and_4); -unsigned long __sync_fetch_and_or_4(unsigned long *p, unsigned long v) +unsigned int __sync_fetch_and_or_4(volatile void *p, unsigned int v) { BUG(); } From 0d5ab144429e8bd80889b856a44d56ab4a5cd59b Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 2 Jul 2020 08:32:25 -0700 Subject: [PATCH 125/343] xtensa: update *pos in cpuinfo_op.next Increment *pos in the cpuinfo_op.next to fix the following warning triggered by cat /proc/cpuinfo: seq_file: buggy .next function c_next did not update position index Signed-off-by: Max Filippov --- arch/xtensa/kernel/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index d9204dc2656e..be2c78f71695 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -724,7 +724,8 @@ c_start(struct seq_file *f, loff_t *pos) static void * c_next(struct seq_file *f, void *v, loff_t *pos) { - return NULL; + ++*pos; + return c_start(f, pos); } static void From 28b70cd9236563e1a88a6094673fef3c08db0d51 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Tue, 23 Jun 2020 16:40:47 -0400 Subject: [PATCH 126/343] IB/hfi1: Do not destroy hfi1_wq when the device is shut down The workqueue hfi1_wq is destroyed in function shutdown_device(), which is called by either shutdown_one() or remove_one(). The function shutdown_one() is called when the kernel is rebooted while remove_one() is called when the hfi1 driver is unloaded. When the kernel is rebooted, hfi1_wq is destroyed while all qps are still active, leading to a kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000102 IP: [] __queue_work+0x32/0x3e0 PGD 0 Oops: 0000 [#1] SMP Modules linked in: dm_round_robin nvme_rdma(OE) nvme_fabrics(OE) nvme_core(OE) ib_isert iscsi_target_mod target_core_mod ib_ucm mlx4_ib iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm rpcrdma sunrpc irqbypass crc32_pclmul ghash_clmulni_intel rdma_ucm aesni_intel ib_uverbs lrw gf128mul opa_vnic glue_helper ablk_helper ib_iser cryptd ib_umad rdma_cm iw_cm ses enclosure libiscsi scsi_transport_sas pcspkr joydev ib_ipoib(OE) scsi_transport_iscsi ib_cm sg ipmi_ssif mei_me lpc_ich i2c_i801 mei ioatdma ipmi_si dm_multipath ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter hangcheck_timer ip_tables ext4 mbcache jbd2 mlx4_en sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm hfi1(OE) crct10dif_pclmul crct10dif_common crc32c_intel drm ahci mlx4_core libahci rdmavt(OE) igb megaraid_sas ib_core libata drm_panel_orientation_quirks ptp pps_core devlink dca i2c_algo_bit dm_mirror dm_region_hash dm_log dm_mod CPU: 19 PID: 0 Comm: swapper/19 Kdump: loaded Tainted: G OE ------------ 3.10.0-957.el7.x86_64 #1 Hardware name: Phegda X2226A/S2600CW, BIOS SE5C610.86B.01.01.0024.021320181901 02/13/2018 task: ffff8a799ba0d140 ti: ffff8a799bad8000 task.ti: ffff8a799bad8000 RIP: 0010:[] [] __queue_work+0x32/0x3e0 RSP: 0018:ffff8a90dde43d80 EFLAGS: 00010046 RAX: 0000000000000082 RBX: 0000000000000086 RCX: 0000000000000000 RDX: ffff8a90b924fcb8 RSI: 0000000000000000 RDI: 000000000000001b RBP: ffff8a90dde43db8 R08: ffff8a799ba0d6d8 R09: ffff8a90dde53900 R10: 0000000000000002 R11: ffff8a90dde43de8 R12: ffff8a90b924fcb8 R13: 000000000000001b R14: 0000000000000000 R15: ffff8a90d2890000 FS: 0000000000000000(0000) GS:ffff8a90dde40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000102 CR3: 0000001a70410000 CR4: 00000000001607e0 Call Trace: [] queue_work_on+0x45/0x50 [] _hfi1_schedule_send+0x6e/0xc0 [hfi1] [] hfi1_schedule_send+0x32/0x70 [hfi1] [] rvt_rc_timeout+0xe9/0x130 [rdmavt] [] ? trigger_load_balance+0x6a/0x280 [] ? rvt_free_qpn+0x40/0x40 [rdmavt] [] call_timer_fn+0x38/0x110 [] ? rvt_free_qpn+0x40/0x40 [rdmavt] [] run_timer_softirq+0x24d/0x300 [] __do_softirq+0xf5/0x280 [] call_softirq+0x1c/0x30 [] do_softirq+0x65/0xa0 [] irq_exit+0x105/0x110 [] smp_apic_timer_interrupt+0x48/0x60 [] apic_timer_interrupt+0x162/0x170 [] ? cpuidle_enter_state+0x57/0xd0 [] cpuidle_idle_call+0xde/0x230 [] arch_cpu_idle+0xe/0xc0 [] cpu_startup_entry+0x14a/0x1e0 [] start_secondary+0x1f7/0x270 [] start_cpu+0x5/0x14 The solution is to destroy the workqueue only when the hfi1 driver is unloaded, not when the device is shut down. In addition, when the device is shut down, no more work should be scheduled on the workqueues and the workqueues are flushed. Fixes: 8d3e71136a08 ("IB/{hfi1, qib}: Add handling of kernel restart") Link: https://lore.kernel.org/r/20200623204047.107638.77646.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/init.c | 27 +++++++++++++++++++++++---- drivers/infiniband/hw/hfi1/qp.c | 5 ++++- drivers/infiniband/hw/hfi1/tid_rdma.c | 5 ++++- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 5eed4360695f..16d6788075f3 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -830,6 +830,25 @@ static int create_workqueues(struct hfi1_devdata *dd) return -ENOMEM; } +/** + * destroy_workqueues - destroy per port workqueues + * @dd: the hfi1_ib device + */ +static void destroy_workqueues(struct hfi1_devdata *dd) +{ + int pidx; + struct hfi1_pportdata *ppd; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + + if (ppd->hfi1_wq) { + destroy_workqueue(ppd->hfi1_wq); + ppd->hfi1_wq = NULL; + } + } +} + /** * enable_general_intr() - Enable the IRQs that will be handled by the * general interrupt handler. @@ -1104,11 +1123,10 @@ static void shutdown_device(struct hfi1_devdata *dd) */ hfi1_quiet_serdes(ppd); - if (ppd->hfi1_wq) { - destroy_workqueue(ppd->hfi1_wq); - ppd->hfi1_wq = NULL; - } + if (ppd->hfi1_wq) + flush_workqueue(ppd->hfi1_wq); if (ppd->link_wq) { + flush_workqueue(ppd->link_wq); destroy_workqueue(ppd->link_wq); ppd->link_wq = NULL; } @@ -1756,6 +1774,7 @@ static void remove_one(struct pci_dev *pdev) * clear dma engines, etc. */ shutdown_device(dd); + destroy_workqueues(dd); stop_timers(dd); diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 2f3d9ce077d3..be62284e42d9 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -367,7 +367,10 @@ bool _hfi1_schedule_send(struct rvt_qp *qp) struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct hfi1_devdata *dd = ppd->dd; + + if (dd->flags & HFI1_SHUTDOWN) + return true; return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, priv->s_sde ? diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 243b4ba0b6f6..facff133139a 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -5406,7 +5406,10 @@ static bool _hfi1_schedule_tid_send(struct rvt_qp *qp) struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct hfi1_devdata *dd = ppd->dd; + + if ((dd->flags & HFI1_SHUTDOWN)) + return true; return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq, priv->s_sde ? From 2315ec12ee8e8257bb335654c62e0cae71dc278d Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Tue, 23 Jun 2020 16:40:53 -0400 Subject: [PATCH 127/343] IB/hfi1: Do not destroy link_wq when the device is shut down The workqueue link_wq should only be destroyed when the hfi1 driver is unloaded, not when the device is shut down. Fixes: 71d47008ca1b ("IB/hfi1: Create workqueue for link events") Link: https://lore.kernel.org/r/20200623204053.107638.70315.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/init.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 16d6788075f3..cb7ad1288821 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -846,6 +846,10 @@ static void destroy_workqueues(struct hfi1_devdata *dd) destroy_workqueue(ppd->hfi1_wq); ppd->hfi1_wq = NULL; } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } } } @@ -1122,14 +1126,10 @@ static void shutdown_device(struct hfi1_devdata *dd) * We can't count on interrupts since we are stopping. */ hfi1_quiet_serdes(ppd); - if (ppd->hfi1_wq) flush_workqueue(ppd->hfi1_wq); - if (ppd->link_wq) { + if (ppd->link_wq) flush_workqueue(ppd->link_wq); - destroy_workqueue(ppd->link_wq); - ppd->link_wq = NULL; - } } sdma_exit(dd); } From f427f4d6214c183c474eeb46212d38e6c7223d6a Mon Sep 17 00:00:00 2001 From: Divya Indi Date: Tue, 23 Jun 2020 19:13:09 -0700 Subject: [PATCH 128/343] IB/sa: Resolv use-after-free in ib_nl_make_request() There is a race condition where ib_nl_make_request() inserts the request data into the linked list but the timer in ib_nl_request_timeout() can see it and destroy it before ib_nl_send_msg() is done touching it. This could happen, for instance, if there is a long delay allocating memory during nlmsg_new() This causes a use-after-free in the send_mad() thread: [] ? ib_pack+0x17b/0x240 [ib_core] [ ] ib_sa_path_rec_get+0x181/0x200 [ib_sa] [] rdma_resolve_route+0x3c0/0x8d0 [rdma_cm] [] ? cma_bind_port+0xa0/0xa0 [rdma_cm] [] ? rds_rdma_cm_event_handler_cmn+0x850/0x850 [rds_rdma] [] rds_rdma_cm_event_handler_cmn+0x22c/0x850 [rds_rdma] [] rds_rdma_cm_event_handler+0x10/0x20 [rds_rdma] [] addr_handler+0x9e/0x140 [rdma_cm] [] process_req+0x134/0x190 [ib_addr] [] process_one_work+0x169/0x4a0 [] worker_thread+0x5b/0x560 [] ? flush_delayed_work+0x50/0x50 [] kthread+0xcb/0xf0 [] ? __schedule+0x24a/0x810 [] ? __schedule+0x24a/0x810 [] ? kthread_create_on_node+0x180/0x180 [] ret_from_fork+0x47/0x90 [] ? kthread_create_on_node+0x180/0x180 The ownership rule is once the request is on the list, ownership transfers to the list and the local thread can't touch it any more, just like for the normal MAD case in send_mad(). Thus, instead of adding before send and then trying to delete after on errors, move the entire thing under the spinlock so that the send and update of the lists are atomic to the conurrent threads. Lightly reoganize things so spinlock safe memory allocations are done in the final NL send path and the rest of the setup work is done before and outside the lock. Fixes: 3ebd2fd0d011 ("IB/sa: Put netlink request into the request list before sending") Link: https://lore.kernel.org/r/1592964789-14533-1-git-send-email-divya.indi@oracle.com Signed-off-by: Divya Indi Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sa_query.c | 40 ++++++++++++++---------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index a2ed09a3c714..8c930bf1df89 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -829,13 +829,20 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask) return len; } -static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) +static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; void *data; struct ib_sa_mad *mad; int len; + unsigned long flags; + unsigned long delay; + gfp_t gfp_flag; + int ret; + + INIT_LIST_HEAD(&query->list); + query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); mad = query->mad_buf->mad; len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask); @@ -860,36 +867,25 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); - return rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_mask); -} + gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC : + GFP_NOWAIT; -static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) -{ - unsigned long flags; - unsigned long delay; - int ret; - - INIT_LIST_HEAD(&query->list); - query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); - - /* Put the request on the list first.*/ spin_lock_irqsave(&ib_nl_request_lock, flags); + ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag); + + if (ret) + goto out; + + /* Put the request on the list.*/ delay = msecs_to_jiffies(sa_local_svc_timeout_ms); query->timeout = delay + jiffies; list_add_tail(&query->list, &ib_nl_request_list); /* Start the timeout if this is the only request */ if (ib_nl_request_list.next == &query->list) queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); - spin_unlock_irqrestore(&ib_nl_request_lock, flags); - ret = ib_nl_send_msg(query, gfp_mask); - if (ret) { - ret = -EIO; - /* Remove the request */ - spin_lock_irqsave(&ib_nl_request_lock, flags); - list_del(&query->list); - spin_unlock_irqrestore(&ib_nl_request_lock, flags); - } +out: + spin_unlock_irqrestore(&ib_nl_request_lock, flags); return ret; } From 1ca0fafd73c5268e8fc4b997094b8bb2bfe8deea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Jul 2020 18:39:33 -0700 Subject: [PATCH 129/343] tcp: md5: allow changing MD5 keys in all socket states This essentially reverts commit 721230326891 ("tcp: md5: reject TCP_MD5SIG or TCP_MD5SIG_EXT on established sockets") Mathieu reported that many vendors BGP implementations can actually switch TCP MD5 on established flows. Quoting Mathieu : Here is a list of a few network vendors along with their behavior with respect to TCP MD5: - Cisco: Allows for password to be changed, but within the hold-down timer (~180 seconds). - Juniper: When password is initially set on active connection it will reset, but after that any subsequent password changes no network resets. - Nokia: No notes on if they flap the tcp connection or not. - Ericsson/RedBack: Allows for 2 password (old/new) to co-exist until both sides are ok with new passwords. - Meta-Switch: Expects the password to be set before a connection is attempted, but no further info on whether they reset the TCP connection on a change. - Avaya: Disable the neighbor, then set password, then re-enable. - Zebos: Would normally allow the change when socket connected. We can revert my prior change because commit 9424e2e7ad93 ("tcp: md5: fix potential overestimation of TCP option space") removed the leak of 4 kernel bytes to the wire that was the main reason for my patch. While doing my investigations, I found a bug when a MD5 key is changed, leading to these commits that stable teams want to consider before backporting this revert : Commit 6a2febec338d ("tcp: md5: add missing memory barriers in tcp_md5_do_add()/tcp_md5_hash_key()") Commit e6ced831ef11 ("tcp: md5: refine tcp_md5_do_add()/tcp_md5_hash_key() barriers") Fixes: 721230326891 "tcp: md5: reject TCP_MD5SIG or TCP_MD5SIG_EXT on established sockets" Signed-off-by: Eric Dumazet Reported-by: Mathieu Desnoyers Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c33f7c6aff8e..861fbd84c9cf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3246,10 +3246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: case TCP_MD5SIG_EXT: - if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) - err = tp->af_specific->md5_parse(sk, optname, optval, optlen); - else - err = -EINVAL; + err = tp->af_specific->md5_parse(sk, optname, optval, optlen); break; #endif case TCP_USER_TIMEOUT: From af199a1a9cb02ec0194804bd46c174b6db262075 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Thu, 2 Jul 2020 12:44:50 +0300 Subject: [PATCH 130/343] net: dsa: microchip: set the correct number of ports The number of ports is incorrectly set to the maximum available for a DSA switch. Even if the extra ports are not used, this causes some functions to be called later, like port_disable() and port_stp_state_set(). If the driver doesn't check the port index, it will end up modifying unknown registers. Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477") Signed-off-by: Codrin Ciubotariu Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795.c | 3 +++ drivers/net/dsa/microchip/ksz9477.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 47d65b77caf7..7c17b0f705ec 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1268,6 +1268,9 @@ static int ksz8795_switch_init(struct ksz_device *dev) return -ENOMEM; } + /* set the real number of ports */ + dev->ds->num_ports = dev->port_cnt; + return 0; } diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 9a51b8a4de5d..8d15c3016024 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1588,6 +1588,9 @@ static int ksz9477_switch_init(struct ksz_device *dev) return -ENOMEM; } + /* set the real number of ports */ + dev->ds->num_ports = dev->port_cnt; + return 0; } From ad4e2b64839710e3b6e17a11b2684ceaaeae795e Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Thu, 2 Jul 2020 15:00:21 +0200 Subject: [PATCH 131/343] MAINTAINERS: net: macb: add Claudiu as co-maintainer I would like that Claudiu becomes co-maintainer of the Cadence macb driver. He's already participating to lots of reviews and enhancements to this driver and knows the different versions of this controller. Signed-off-by: Nicolas Ferre Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 63c4cc4a04d6..5f14938a5985 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2929,6 +2929,7 @@ F: include/uapi/linux/atm* ATMEL MACB ETHERNET DRIVER M: Nicolas Ferre +M: Claudiu Beznea S: Supported F: drivers/net/ethernet/cadence/ From 1a9826204109acd3f8e926394b16bd4cdc4c5dbb Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Wed, 1 Jul 2020 11:07:44 +0800 Subject: [PATCH 132/343] scsi: iscsi: Change iSCSI workqueue max_active back to 1 Commit 3ce419662dd4 ("scsi: iscsi: Register sysfs for iscsi workqueue") enabled 'cpumask' support for iSCSI workqueues. However, it is unnecessary to set max_active = 2 since 'cpumask' can still be modified when max_active is 1. This patch sets max_active to 1 so as to keep the same behaviour as before. Link: https://lore.kernel.org/r/20200701030745.16897-1-bob.liu@oracle.com Reviewed-by: Mike Christie Signed-off-by: Bob Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 2 +- drivers/scsi/scsi_transport_iscsi.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index e5a64d4f255c..49c8a1818baf 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2629,7 +2629,7 @@ struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht, "iscsi_q_%d", shost->host_no); ihost->workq = alloc_workqueue("%s", WQ_SYSFS | __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_UNBOUND, - 2, ihost->workq_name); + 1, ihost->workq_name); if (!ihost->workq) goto free_host; } diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index f4cc08eb47ba..7ae5024e7824 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -4760,7 +4760,7 @@ static __init int iscsi_transport_init(void) iscsi_eh_timer_workq = alloc_workqueue("%s", WQ_SYSFS | __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_UNBOUND, - 2, "iscsi_eh"); + 1, "iscsi_eh"); if (!iscsi_eh_timer_workq) { err = -ENOMEM; goto release_nls; From cb551b8dc079d2ef189145782627c99cb68c0255 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 1 Jul 2020 17:52:54 +0900 Subject: [PATCH 133/343] scsi: mpt3sas: Fix unlock imbalance In BRM_status_show(), if the condition "!ioc->is_warpdrive" tested on entry to the function is true, a "goto out" is called. This results in unlocking ioc->pci_access_mutex without this mutex lock being taken. This generates the following splat: [ 1148.539883] mpt3sas_cm2: BRM_status_show: BRM attribute is only for warpdrive [ 1148.547184] [ 1148.548708] ===================================== [ 1148.553501] WARNING: bad unlock balance detected! [ 1148.558277] 5.8.0-rc3+ #827 Not tainted [ 1148.562183] ------------------------------------- [ 1148.566959] cat/5008 is trying to release lock (&ioc->pci_access_mutex) at: [ 1148.574035] [] BRM_status_show+0xd3/0x100 [mpt3sas] [ 1148.580574] but there are no more locks to release! [ 1148.585524] [ 1148.585524] other info that might help us debug this: [ 1148.599624] 3 locks held by cat/5008: [ 1148.607085] #0: ffff92aea3e392c0 (&p->lock){+.+.}-{3:3}, at: seq_read+0x34/0x480 [ 1148.618509] #1: ffff922ef14c4888 (&of->mutex){+.+.}-{3:3}, at: kernfs_seq_start+0x2a/0xb0 [ 1148.630729] #2: ffff92aedb5d7310 (kn->active#224){.+.+}-{0:0}, at: kernfs_seq_start+0x32/0xb0 [ 1148.643347] [ 1148.643347] stack backtrace: [ 1148.655259] CPU: 73 PID: 5008 Comm: cat Not tainted 5.8.0-rc3+ #827 [ 1148.665309] Hardware name: HGST H4060-S/S2600STB, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 [ 1148.678394] Call Trace: [ 1148.684750] dump_stack+0x78/0xa0 [ 1148.691802] lock_release.cold+0x45/0x4a [ 1148.699451] __mutex_unlock_slowpath+0x35/0x270 [ 1148.707675] BRM_status_show+0xd3/0x100 [mpt3sas] [ 1148.716092] dev_attr_show+0x19/0x40 [ 1148.723664] sysfs_kf_seq_show+0x87/0x100 [ 1148.731193] seq_read+0xbc/0x480 [ 1148.737882] vfs_read+0xa0/0x160 [ 1148.744514] ksys_read+0x58/0xd0 [ 1148.751129] do_syscall_64+0x4c/0xa0 [ 1148.757941] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1148.766240] RIP: 0033:0x7f1230566542 [ 1148.772957] Code: Bad RIP value. [ 1148.779206] RSP: 002b:00007ffeac1bcac8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 1148.790063] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f1230566542 [ 1148.800284] RDX: 0000000000020000 RSI: 00007f1223460000 RDI: 0000000000000003 [ 1148.810474] RBP: 00007f1223460000 R08: 00007f122345f010 R09: 0000000000000000 [ 1148.820641] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000000000 [ 1148.830728] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 Fix this by returning immediately instead of jumping to the out label. Link: https://lore.kernel.org/r/20200701085254.51740-1-damien.lemoal@wdc.com Reviewed-by: Johannes Thumshirn Acked-by: Sreekanth Reddy Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 62e552838565..e94e72de2fc6 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -3145,7 +3145,7 @@ BRM_status_show(struct device *cdev, struct device_attribute *attr, if (!ioc->is_warpdrive) { ioc_err(ioc, "%s: BRM attribute is only for warpdrive\n", __func__); - goto out; + return 0; } /* pci_access_mutex lock acquired by sysfs show path */ mutex_lock(&ioc->pci_access_mutex); From d8ca55addb9315ecd9fc4397f4c94d3f1980161c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 2 Jul 2020 19:26:28 -0700 Subject: [PATCH 134/343] vfio-ccw: Fix a build error due to missing include of linux/slab.h Include linux/slab.h to fix a build error due to kfree() being undefined. Fixes: 3f02cb2fd9d2 ("vfio-ccw: Wire up the CRW irq and CRW region") Signed-off-by: Sean Christopherson Message-Id: <20200703022628.6036-1-sean.j.christopherson@intel.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_chp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/cio/vfio_ccw_chp.c b/drivers/s390/cio/vfio_ccw_chp.c index a646fc81c872..13b26a1c7988 100644 --- a/drivers/s390/cio/vfio_ccw_chp.c +++ b/drivers/s390/cio/vfio_ccw_chp.c @@ -8,6 +8,7 @@ * Eric Farman */ +#include #include #include "vfio_ccw_private.h" From eb25de276505c664c6ee838e60c6e5ad1837f55e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 3 Jul 2020 08:11:16 -0300 Subject: [PATCH 135/343] tools arch: Update arch/x86/lib/memcpy_64.S copy used in 'perf bench mem memcpy' To bring in the change made in this cset: e3a9e681adb7 ("x86/entry: Fixup bad_iret vs noinstr") This doesn't cause any functional changes to tooling, just a rebuild. Addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/lib/memcpy_64.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index df767afc690f..45f8e1b02241 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -8,6 +8,8 @@ #include #include +.pushsection .noinstr.text, "ax" + /* * We build a jump to memcpy_orig by default which gets NOPped out on * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which @@ -184,6 +186,8 @@ SYM_FUNC_START(memcpy_orig) retq SYM_FUNC_END(memcpy_orig) +.popsection + #ifndef CONFIG_UML MCSAFE_TEST_CTL From 640432e6bed08e9d5d2ba26856ba3f55008b07e3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 29 Jun 2020 12:19:50 +0300 Subject: [PATCH 136/343] perf scripts python: export-to-postgresql.py: Fix struct.pack() int argument Python 3.8 is requiring that arguments being packed as integers are also integers. Add int() accordingly. Before: $ perf record -e intel_pt//u uname $ perf script --itrace=bep -s ~/libexec/perf-core/scripts/python/export-to-postgresql.py perf_data_db branches calls 2020-06-25 16:09:10.547256 Creating database... 2020-06-25 16:09:10.733185 Writing to intermediate files... Traceback (most recent call last): File "/home/ahunter/libexec/perf-core/scripts/python/export-to-postgresql.py", line 1106, in synth_data cbr(id, raw_buf) File "/home/ahunter/libexec/perf-core/scripts/python/export-to-postgresql.py", line 1058, in cbr value = struct.pack("!hiqiiiiii", 4, 8, id, 4, cbr, 4, MHz, 4, percent) struct.error: required argument is not an integer Fatal Python error: problem in Python trace event handler Python runtime state: initialized Current thread 0x00007f35d3695780 (most recent call first): Aborted (core dumped) After: $ dropdb perf_data_db $ rm -rf perf_data_db-perf-data $ perf script --itrace=bep -s ~/libexec/perf-core/scripts/python/export-to-postgresql.py perf_data_db branches calls 2020-06-25 16:09:40.990267 Creating database... 2020-06-25 16:09:41.207009 Writing to intermediate files... 2020-06-25 16:09:41.270915 Copying to database... 2020-06-25 16:09:41.382030 Removing intermediate files... 2020-06-25 16:09:41.384630 Adding primary keys 2020-06-25 16:09:41.541894 Adding foreign keys 2020-06-25 16:09:41.677044 Dropping unused tables 2020-06-25 16:09:41.703761 Done Fixes: aba44287a224 ("perf scripts python: export-to-postgresql.py: Export Intel PT power and ptwrite events") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20200629091955.17090-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 7bd73a904b4e..d187e46c2683 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -1055,7 +1055,7 @@ def cbr(id, raw_buf): cbr = data[0] MHz = (data[4] + 500) / 1000 percent = ((cbr * 1000 / data[2]) + 5) / 10 - value = struct.pack("!hiqiiiiii", 4, 8, id, 4, cbr, 4, MHz, 4, percent) + value = struct.pack("!hiqiiiiii", 4, 8, id, 4, cbr, 4, int(MHz), 4, int(percent)) cbr_file.write(value) def mwait(id, raw_buf): From 442ad2254ac56b39870c0cfed96d500921fea5d5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 29 Jun 2020 12:19:51 +0300 Subject: [PATCH 137/343] perf record: Fix duplicated sideband events with Intel PT system wide tracing Commit 0a892c1c9472 ("perf record: Add dummy event during system wide synthesis") reveals an issue with Intel PT system wide tracing. Specifically that Intel PT already adds a dummy tracking event, and it is not the first event. Adding another dummy tracking event causes duplicated sideband events. Fix by checking for an existing dummy tracking event first. Example showing duplicated switch events: Before: # perf record -a -e intel_pt//u uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.895 MB perf.data ] # perf script --no-itrace --show-switch-events | head swapper 0 [007] 6390.516222: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 11/11 swapper 0 [007] 6390.516222: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 11/11 rcu_sched 11 [007] 6390.516223: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 rcu_sched 11 [007] 6390.516224: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 rcu_sched 11 [007] 6390.516227: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 rcu_sched 11 [007] 6390.516227: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 swapper 0 [007] 6390.516228: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 11/11 swapper 0 [007] 6390.516228: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 11/11 swapper 0 [002] 6390.516415: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 5556/5559 swapper 0 [002] 6390.516416: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 5556/5559 After: # perf record -a -e intel_pt//u uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.868 MB perf.data ] # perf script --no-itrace --show-switch-events | head swapper 0 [005] 6450.567013: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 7179/7181 perf 7181 [005] 6450.567014: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 perf 7181 [005] 6450.567028: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 swapper 0 [005] 6450.567029: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 7179/7181 swapper 0 [005] 6450.571699: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 11/11 rcu_sched 11 [005] 6450.571700: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 rcu_sched 11 [005] 6450.571702: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 swapper 0 [005] 6450.571703: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 11/11 swapper 0 [005] 6450.579703: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 11/11 rcu_sched 11 [005] 6450.579704: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200629091955.17090-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 18 +++++++++--------- tools/perf/util/evlist.c | 12 ++++++++++++ tools/perf/util/evlist.h | 1 + tools/perf/util/evsel.c | 8 +------- tools/perf/util/evsel.h | 6 ++++++ 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e108d90ae2ed..a37e7910e9e9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -852,20 +852,20 @@ static int record__open(struct record *rec) * event synthesis. */ if (opts->initial_delay || target__has_cpu(&opts->target)) { - if (perf_evlist__add_dummy(evlist)) - return -ENOMEM; + pos = perf_evlist__get_tracking_event(evlist); + if (!evsel__is_dummy_event(pos)) { + /* Set up dummy event. */ + if (perf_evlist__add_dummy(evlist)) + return -ENOMEM; + pos = evlist__last(evlist); + perf_evlist__set_tracking_event(evlist, pos); + } - /* Disable tracking of mmaps on lead event. */ - pos = evlist__first(evlist); - pos->tracking = 0; - /* Set up dummy event. */ - pos = evlist__last(evlist); - pos->tracking = 1; /* * Enable the dummy event when the process is forked for * initial_delay, immediately for system wide. */ - if (opts->initial_delay) + if (opts->initial_delay && !pos->immediate) pos->core.attr.enable_on_exec = 1; else pos->immediate = 1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 173b4f0e0e6e..ab48be4cf258 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1566,6 +1566,18 @@ void perf_evlist__to_front(struct evlist *evlist, list_splice(&move, &evlist->core.entries); } +struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->tracking) + return evsel; + } + + return evlist__first(evlist); +} + void perf_evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index b6f325dfb4d2..a8081dfc19cf 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -335,6 +335,7 @@ void perf_evlist__to_front(struct evlist *evlist, evlist__cpu_iter_start(evlist); \ perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus) +struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist); void perf_evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 96e5171dce41..a68ac3632ae6 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -898,12 +898,6 @@ static void evsel__apply_config_terms(struct evsel *evsel, } } -static bool is_dummy_event(struct evsel *evsel) -{ - return (evsel->core.attr.type == PERF_TYPE_SOFTWARE) && - (evsel->core.attr.config == PERF_COUNT_SW_DUMMY); -} - struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type) { struct evsel_config_term *term, *found_term = NULL; @@ -1161,7 +1155,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, * The software event will trigger -EOPNOTSUPP error out, * if BRANCH_STACK bit is set. */ - if (is_dummy_event(evsel)) + if (evsel__is_dummy_event(evsel)) evsel__reset_sample_bit(evsel, BRANCH_STACK); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0f963c2a88a5..35e3f6d66085 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -399,6 +399,12 @@ static inline bool evsel__has_br_stack(const struct evsel *evsel) evsel->synth_sample_type & PERF_SAMPLE_BRANCH_STACK; } +static inline bool evsel__is_dummy_event(struct evsel *evsel) +{ + return (evsel->core.attr.type == PERF_TYPE_SOFTWARE) && + (evsel->core.attr.config == PERF_COUNT_SW_DUMMY); +} + struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); From 3a3cf7c570a486b07d9a6e68a77548aea6a8421f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 29 Jun 2020 12:19:52 +0300 Subject: [PATCH 138/343] perf scripts python: exported-sql-viewer.py: Fix unexpanded 'Find' result Using Python version 3.8.2 and PySide2 version 5.14.0, ctrl-F ('Find') would not expand the tree to the result. Fix by using setExpanded(). Example: $ perf record -e intel_pt//u uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.034 MB perf.data ] $ perf script --itrace=bep -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py perf.data.db branches calls 2020-06-26 15:32:14.928997 Creating database ... 2020-06-26 15:32:14.933971 Writing records... 2020-06-26 15:32:15.535251 Adding indexes 2020-06-26 15:32:15.542993 Dropping unused tables 2020-06-26 15:32:15.549716 Done $ python3 ~/libexec/perf-core/scripts/python/exported-sql-viewer.py perf.data.db Select: Reports -> Context-Sensitive Call Graph or Reports -> Call Tree Press: Ctrl-F Enter: main Press: Enter Before: line showing 'main' does not display After: tree is expanded to line showing 'main' Fixes: ebd70c7dc2f5f ("perf scripts python: exported-sql-viewer.py: Add ability to find symbols in the call-graph") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20200629091955.17090-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 26d7be785288..4be7cb68c8bb 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -1050,6 +1050,7 @@ class TreeWindowBase(QMdiSubWindow): child = self.model.index(row, 0, parent) if child.internalPointer().dbid == dbid: found = True + self.view.setExpanded(parent, True) self.view.setCurrentIndex(child) parent = child break From 7ff520b0a71dd2db695b52ad117d81b7eaf6ff9d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 29 Jun 2020 12:19:53 +0300 Subject: [PATCH 139/343] perf scripts python: exported-sql-viewer.py: Fix zero id in call graph 'Find' result Using ctrl-F ('Find') would not find 'unknown' because it matches id zero. Fix by excluding id zero from selection. Example: $ perf record -e intel_pt//u uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.034 MB perf.data ] $ perf script --itrace=bep -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py perf.data.db branches calls 2020-06-26 15:32:14.928997 Creating database ... 2020-06-26 15:32:14.933971 Writing records... 2020-06-26 15:32:15.535251 Adding indexes 2020-06-26 15:32:15.542993 Dropping unused tables 2020-06-26 15:32:15.549716 Done $ python3 ~/libexec/perf-core/scripts/python/exported-sql-viewer.py perf.data.db Select: Reports -> Context-Sensitive Call Graph Press: Ctrl-F Enter: unknown Press: Enter Before: gets stuck After: tree is expanded to line showing 'unknown' Fixes: 254c0d820b86d ("perf scripts python: exported-sql-viewer.py: Factor out CallGraphModelBase") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20200629091955.17090-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 4be7cb68c8bb..e0c90aeff15e 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -768,7 +768,8 @@ class CallGraphModel(CallGraphModelBase): " FROM calls" " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" - " WHERE symbols.name" + match + + " WHERE calls.id <> 0" + " AND symbols.name" + match + " GROUP BY comm_id, thread_id, call_path_id" " ORDER BY comm_id, thread_id, call_path_id") From 031c8d5edb1ddeb6d398f7942ce2a01a1a51ada9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 29 Jun 2020 12:19:54 +0300 Subject: [PATCH 140/343] perf scripts python: exported-sql-viewer.py: Fix zero id in call tree 'Find' result Using ctrl-F ('Find') would not find 'unknown' because it matches id zero. Fix by excluding id zero from selection. Example: $ perf record -e intel_pt//u uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.034 MB perf.data ] $ perf script --itrace=bep -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py perf.data.db branches calls 2020-06-26 15:32:14.928997 Creating database ... 2020-06-26 15:32:14.933971 Writing records... 2020-06-26 15:32:15.535251 Adding indexes 2020-06-26 15:32:15.542993 Dropping unused tables 2020-06-26 15:32:15.549716 Done $ python3 ~/libexec/perf-core/scripts/python/exported-sql-viewer.py perf.data.db Select: Reports -> Call Tree Press: Ctrl-F Enter: unknown Press: Enter Before: displays 'unknown' not found After: tree is expanded to line showing 'unknown' Fixes: ae8b887c00d3f ("perf scripts python: exported-sql-viewer.py: Add call tree") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20200629091955.17090-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index e0c90aeff15e..0f295055ac6b 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -964,7 +964,8 @@ class CallTreeModel(CallGraphModelBase): " FROM calls" " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" - " WHERE symbols.name" + match + + " WHERE calls.id <> 0" + " AND symbols.name" + match + " ORDER BY comm_id, thread_id, call_time, calls.id") def FindPath(self, query): From f18d5cf86cdb58eb50cafb5a5e20943ec7a61b1f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 29 Jun 2020 12:19:55 +0300 Subject: [PATCH 141/343] perf scripts python: exported-sql-viewer.py: Fix time chart call tree Using Python version 3.8.2 and PySide2 version 5.14.0, time chart call tree would not expand the tree to the result. Fix by using setExpanded(). Example: $ perf record -e intel_pt//u uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.034 MB perf.data ] $ perf script --itrace=bep -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py perf.data.db branches calls 2020-06-26 15:32:14.928997 Creating database ... 2020-06-26 15:32:14.933971 Writing records... 2020-06-26 15:32:15.535251 Adding indexes 2020-06-26 15:32:15.542993 Dropping unused tables 2020-06-26 15:32:15.549716 Done $ python3 ~/libexec/perf-core/scripts/python/exported-sql-viewer.py perf.data.db Select: Charts -> Time chart by CPU Move mouse over middle of chart Right-click and select Show Call Tree Before: displays Call Tree but not expanded to selected time After: displays Call Tree expanded to selected time Fixes: e69d5df75d74d ("perf scripts python: exported-sql-viewer.py: Add ability for Call tree to open at a specified task and time") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20200629091955.17090-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 0f295055ac6b..7daa8bb70a5a 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -1130,6 +1130,7 @@ class CallTreeWindow(TreeWindowBase): child = self.model.index(row, 0, parent) if child.internalPointer().dbid == dbid: found = True + self.view.setExpanded(parent, True) self.view.setCurrentIndex(child) parent = child break @@ -1142,6 +1143,7 @@ class CallTreeWindow(TreeWindowBase): return last_child = None for row in xrange(n): + self.view.setExpanded(parent, True) child = self.model.index(row, 0, parent) child_call_time = child.internalPointer().call_time if child_call_time < time: @@ -1154,9 +1156,11 @@ class CallTreeWindow(TreeWindowBase): if not last_child: if not found: child = self.model.index(0, 0, parent) + self.view.setExpanded(parent, True) self.view.setCurrentIndex(child) return found = True + self.view.setExpanded(parent, True) self.view.setCurrentIndex(last_child) parent = last_child From 374855c5e4a76f7cc7f9aeb613c54929510eff18 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 2 Jul 2020 14:53:45 -0400 Subject: [PATCH 142/343] tools lib traceevent: Add API to read time information from kbuffer Add the functions kbuffer_subbuf_timestamp() and kbuffer_ptr_delta() to get the timing data stored in the ring buffer that is used to produced the time stamps of the records. This is useful for tools like trace-cmd to be able to display the content of the read data to understand why the records show the time stamps that they do. Link: http://lore.kernel.org/linux-trace-devel/20200625100516.365338-2-tz.stoyanov@gmail.com Signed-off-by: Steven Rostedt (VMware) [ Ported from trace-cmd.git ] Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20200702185703.619656282@goodmis.org Signed-off-by: Tzvetomir Stoyanov (VMware) Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/kbuffer-parse.c | 28 ++++++++++++++++++++++++++++ tools/lib/traceevent/kbuffer.h | 2 ++ 2 files changed, 30 insertions(+) diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c index 27f3b07fdae8..583db99aee94 100644 --- a/tools/lib/traceevent/kbuffer-parse.c +++ b/tools/lib/traceevent/kbuffer-parse.c @@ -546,6 +546,34 @@ int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer) return 0; } +/** + * kbuffer_subbuf_timestamp - read the timestamp from a sub buffer + * @kbuf: The kbuffer to load + * @subbuf: The subbuffer to read from. + * + * Return the timestamp from a subbuffer. + */ +unsigned long long kbuffer_subbuf_timestamp(struct kbuffer *kbuf, void *subbuf) +{ + return kbuf->read_8(subbuf); +} + +/** + * kbuffer_ptr_delta - read the delta field from a record + * @kbuf: The kbuffer to load + * @ptr: The record in the buffe. + * + * Return the timestamp delta from a record + */ +unsigned int kbuffer_ptr_delta(struct kbuffer *kbuf, void *ptr) +{ + unsigned int type_len_ts; + + type_len_ts = read_4(kbuf, ptr); + return ts4host(kbuf, type_len_ts); +} + + /** * kbuffer_read_event - read the next event in the kbuffer subbuffer * @kbuf: The kbuffer to read from diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h index ed4d697fc137..5fa8292e341b 100644 --- a/tools/lib/traceevent/kbuffer.h +++ b/tools/lib/traceevent/kbuffer.h @@ -49,6 +49,8 @@ int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer); void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts); void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts); unsigned long long kbuffer_timestamp(struct kbuffer *kbuf); +unsigned long long kbuffer_subbuf_timestamp(struct kbuffer *kbuf, void *subbuf); +unsigned int kbuffer_ptr_delta(struct kbuffer *kbuf, void *ptr); void *kbuffer_translate_data(int swap, void *data, unsigned int *size); From 2160d6c8a13e685b7b2bacbe9cd1e9600506a05f Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 2 Jul 2020 14:53:46 -0400 Subject: [PATCH 143/343] tools lib traceevent: Add proper KBUFFER_TYPE_TIME_STAMP handling Kernel commit dc4e2801d400 (ring-buffer: Redefine the unimplemented RINGBUF_TYPE_TIME_STAMP) changed the way the ring buffer timestamps work - after that commit the previously unimplemented RINGBUF_TYPE_TIME_STAMP type causes the time delta to be used as a timestamp rather than a delta to be added to the timestamp. The trace-cmd code didn't get updated to handle this, so misinterprets the event data for this case, which causes a cascade of errors, including trace-report not being able to identify synthetic (or any other) events generated by the histogram code (which uses TIME_STAMP mode). For example, the following triggers along with the trace-cmd shown cause an UNKNOWN_EVENT error and trace-cmd report crash: # echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > /sys/kernel/debug/tracing/synthetic_events # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).trace(wakeup_latency,$wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > /sys/kernel/debug/tracing/events/sched/sched_switch/trigger # echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger # trace-cmd record -e wakeup_latency -e sched_wakeup -f comm==\"ping\" ping localhost -c 5 # trace-cmd report CPU 0 is empty CPU 1 is empty CPU 2 is empty CPU 3 is empty CPU 5 is empty CPU 6 is empty CPU 7 is empty cpus=8 ug! no event found for type 0 [UNKNOWN TYPE 0] ug! no event found for type 11520 Segmentation fault (core dumped) After this patch we get the correct interpretation and the events are shown properly: # trace-cmd report CPU 0 is empty CPU 1 is empty CPU 2 is empty CPU 3 is empty CPU 5 is empty CPU 6 is empty CPU 7 is empty cpus=8 -0 [004] 23284.341392: sched_wakeup: ping:12031 [120] success=1 CPU:004 -0 [004] 23284.341464: wakeup_latency: lat=58, pid=12031, comm=ping -0 [004] 23285.365303: sched_wakeup: ping:12031 [120] success=1 CPU:004 -0 [004] 23285.365382: wakeup_latency: lat=64, pid=12031, comm=ping -0 [004] 23286.389290: sched_wakeup: ping:12031 [120] success=1 CPU:004 -0 [004] 23286.389378: wakeup_latency: lat=72, pid=12031, comm=ping -0 [004] 23287.413213: sched_wakeup: ping:12031 [120] success=1 CPU:004 -0 [004] 23287.413291: wakeup_latency: lat=64, pid=12031, comm=ping Link: http://lkml.kernel.org/r/1567628224.13841.4.camel@kernel.org Link: http://lore.kernel.org/linux-trace-devel/20200625100516.365338-3-tz.stoyanov@gmail.com Signed-off-by: Tom Zanussi [ Ported from trace-cmd.git ] Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20200702185703.785094515@goodmis.org Signed-off-by: Tzvetomir Stoyanov (VMware) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/kbuffer-parse.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c index 583db99aee94..f1640d651c8a 100644 --- a/tools/lib/traceevent/kbuffer-parse.c +++ b/tools/lib/traceevent/kbuffer-parse.c @@ -361,6 +361,7 @@ translate_data(struct kbuffer *kbuf, void *data, void **rptr, break; case KBUFFER_TYPE_TIME_EXTEND: + case KBUFFER_TYPE_TIME_STAMP: extend = read_4(kbuf, data); data += 4; extend <<= TS_SHIFT; @@ -369,10 +370,6 @@ translate_data(struct kbuffer *kbuf, void *data, void **rptr, *length = 0; break; - case KBUFFER_TYPE_TIME_STAMP: - data += 12; - *length = 0; - break; case 0: *length = read_4(kbuf, data) - 4; *length = (*length + 3) & ~3; @@ -397,7 +394,11 @@ static unsigned int update_pointers(struct kbuffer *kbuf) type_len = translate_data(kbuf, ptr, &ptr, &delta, &length); - kbuf->timestamp += delta; + if (type_len == KBUFFER_TYPE_TIME_STAMP) + kbuf->timestamp = delta; + else + kbuf->timestamp += delta; + kbuf->index = calc_index(kbuf, ptr); kbuf->next = kbuf->index + length; @@ -454,7 +455,9 @@ static int __next_event(struct kbuffer *kbuf) if (kbuf->next >= kbuf->size) return -1; type = update_pointers(kbuf); - } while (type == KBUFFER_TYPE_TIME_EXTEND || type == KBUFFER_TYPE_PADDING); + } while (type == KBUFFER_TYPE_TIME_EXTEND || + type == KBUFFER_TYPE_TIME_STAMP || + type == KBUFFER_TYPE_PADDING); return 0; } From d005fbb855d3b5660d62ee5a6bd2d99c13ff8cf3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 1 Jul 2020 13:17:40 +0200 Subject: [PATCH 144/343] netfilter: conntrack: refetch conntrack after nf_conntrack_update() __nf_conntrack_update() might refresh the conntrack object that is attached to the skbuff. Otherwise, this triggers UAF. [ 633.200434] ================================================================== [ 633.200472] BUG: KASAN: use-after-free in nf_conntrack_update+0x34e/0x770 [nf_conntrack] [ 633.200478] Read of size 1 at addr ffff888370804c00 by task nfqnl_test/6769 [ 633.200487] CPU: 1 PID: 6769 Comm: nfqnl_test Not tainted 5.8.0-rc2+ #388 [ 633.200490] Hardware name: LENOVO 23259H1/23259H1, BIOS G2ET32WW (1.12 ) 05/30/2012 [ 633.200491] Call Trace: [ 633.200499] dump_stack+0x7c/0xb0 [ 633.200526] ? nf_conntrack_update+0x34e/0x770 [nf_conntrack] [ 633.200532] print_address_description.constprop.6+0x1a/0x200 [ 633.200539] ? _raw_write_lock_irqsave+0xc0/0xc0 [ 633.200568] ? nf_conntrack_update+0x34e/0x770 [nf_conntrack] [ 633.200594] ? nf_conntrack_update+0x34e/0x770 [nf_conntrack] [ 633.200598] kasan_report.cold.9+0x1f/0x42 [ 633.200604] ? call_rcu+0x2c0/0x390 [ 633.200633] ? nf_conntrack_update+0x34e/0x770 [nf_conntrack] [ 633.200659] nf_conntrack_update+0x34e/0x770 [nf_conntrack] [ 633.200687] ? nf_conntrack_find_get+0x30/0x30 [nf_conntrack] Closes: https://bugzilla.netfilter.org/show_bug.cgi?id=1436 Fixes: ee04805ff54a ("netfilter: conntrack: make conntrack userspace helpers work again") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 79cd9dde457b..f33d72c5b06e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2158,6 +2158,8 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb) err = __nf_conntrack_update(net, skb, ct, ctinfo); if (err < 0) return err; + + ct = nf_ct_get(skb, &ctinfo); } return nf_confirm_cthelper(skb, ct, ctinfo); From d74fcfc1f0ff4b6c26ecef1f9e48d8089ab4eaac Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 2 Jul 2020 19:17:14 -0700 Subject: [PATCH 145/343] KVM: x86: Inject #GP if guest attempts to toggle CR4.LA57 in 64-bit mode Inject a #GP on MOV CR4 if CR4.LA57 is toggled in 64-bit mode, which is illegal per Intel's SDM: CR4.LA57 57-bit linear addresses (bit 12 of CR4) ... blah blah blah ... This bit cannot be modified in IA-32e mode. Note, the pseudocode for MOV CR doesn't call out the fault condition, which is likely why the check was missed during initial development. This is arguably an SDM bug and will hopefully be fixed in future release of the SDM. Fixes: fd8cb433734ee ("KVM: MMU: Expose the LA57 feature to VM.") Cc: stable@vger.kernel.org Reported-by: Sebastien Boeuf Signed-off-by: Sean Christopherson Message-Id: <20200703021714.5549-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a026d926072c..88c593f83b28 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -975,6 +975,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (is_long_mode(vcpu)) { if (!(cr4 & X86_CR4_PAE)) return 1; + if ((cr4 ^ old_cr4) & X86_CR4_LA57) + return 1; } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) && ((cr4 ^ old_cr4) & pdptr_bits) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, From 7c83d096aed055a7763a03384f92115363448b71 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 2 Jul 2020 21:04:21 -0700 Subject: [PATCH 146/343] KVM: x86: Mark CR4.TSD as being possibly owned by the guest Mark CR4.TSD as being possibly owned by the guest as that is indeed the case on VMX. Without TSD being tagged as possibly owned by the guest, a targeted read of CR4 to get TSD could observe a stale value. This bug is benign in the current code base as the sole consumer of TSD is the emulator (for RDTSC) and the emulator always "reads" the entirety of CR4 when grabbing bits. Add a build-time assertion in to ensure VMX doesn't hand over more CR4 bits without also updating x86. Fixes: 52ce3c21aec3 ("x86,kvm,vmx: Don't trap writes to CR4.TSD") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20200703040422.31536-2-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm_cache_regs.h | 2 +- arch/x86/kvm/vmx/vmx.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index ff2d0e9ca3bc..cfe83d4ae625 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -7,7 +7,7 @@ #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS #define KVM_POSSIBLE_CR4_GUEST_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE | X86_CR4_TSD) #define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cb22f33bf1d8..5c9bfc0b9ab9 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4034,6 +4034,8 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) { + BUILD_BUG_ON(KVM_CR4_GUEST_OWNED_BITS & ~KVM_POSSIBLE_CR4_GUEST_BITS); + vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; if (enable_ept) vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; From fa71e9527f6a0153ae6a880031b902818af1bdaf Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 2 Jul 2020 21:04:22 -0700 Subject: [PATCH 147/343] KVM: VMX: Use KVM_POSSIBLE_CR*_GUEST_BITS to initialize guest/host masks Use the "common" KVM_POSSIBLE_CR*_GUEST_BITS defines to initialize the CR0/CR4 guest host masks instead of duplicating most of the CR4 mask and open coding the CR0 mask. SVM doesn't utilize the masks, i.e. the masks are effectively VMX specific even if they're not named as such. This avoids duplicate code, better documents the guest owned CR0 bit, and eliminates the need for a build-time assertion to keep VMX and x86 synchronized. Signed-off-by: Sean Christopherson Message-Id: <20200703040422.31536-3-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 4 ++-- arch/x86/kvm/vmx/vmx.c | 15 +++++---------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d1af20b050a8..b26655104d4a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4109,7 +4109,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * CR0_GUEST_HOST_MASK is already set in the original vmcs01 * (KVM doesn't change it); */ - vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; + vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; vmx_set_cr0(vcpu, vmcs12->host_cr0); /* Same as above - no reason to call set_cr4_guest_host_mask(). */ @@ -4259,7 +4259,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) */ vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); - vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; + vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5c9bfc0b9ab9..13745f2a5ecd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -133,9 +133,6 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); #define KVM_VM_CR0_ALWAYS_ON \ (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) -#define KVM_CR4_GUEST_OWNED_BITS \ - (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) #define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) @@ -4034,11 +4031,9 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) { - BUILD_BUG_ON(KVM_CR4_GUEST_OWNED_BITS & ~KVM_POSSIBLE_CR4_GUEST_BITS); - - vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; - if (enable_ept) - vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; + vmx->vcpu.arch.cr4_guest_owned_bits = KVM_POSSIBLE_CR4_GUEST_BITS; + if (!enable_ept) + vmx->vcpu.arch.cr4_guest_owned_bits &= ~X86_CR4_PGE; if (is_guest_mode(&vmx->vcpu)) vmx->vcpu.arch.cr4_guest_owned_bits &= ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask; @@ -4335,8 +4330,8 @@ static void init_vmcs(struct vcpu_vmx *vmx) /* 22.2.1, 20.8.1 */ vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); - vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; - vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); + vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; + vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits); set_cr4_guest_host_mask(vmx); From d7bf2ebebc2bd61ab95e2a8e33541ef282f303d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 3 Jul 2020 22:26:43 +0200 Subject: [PATCH 148/343] sched: consistently handle layer3 header accesses in the presence of VLANs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a couple of places in net/sched/ that check skb->protocol and act on the value there. However, in the presence of VLAN tags, the value stored in skb->protocol can be inconsistent based on whether VLAN acceleration is enabled. The commit quoted in the Fixes tag below fixed the users of skb->protocol to use a helper that will always see the VLAN ethertype. However, most of the callers don't actually handle the VLAN ethertype, but expect to find the IP header type in the protocol field. This means that things like changing the ECN field, or parsing diffserv values, stops working if there's a VLAN tag, or if there are multiple nested VLAN tags (QinQ). To fix this, change the helper to take an argument that indicates whether the caller wants to skip the VLAN tags or not. When skipping VLAN tags, we make sure to skip all of them, so behaviour is consistent even in QinQ mode. To make the helper usable from the ECN code, move it to if_vlan.h instead of pkt_sched.h. v3: - Remove empty lines - Move vlan variable definitions inside loop in skb_protocol() - Also use skb_protocol() helper in IP{,6}_ECN_decapsulate() and bpf_skb_ecn_set_ce() v2: - Use eth_type_vlan() helper in skb_protocol() - Also fix code that reads skb->protocol directly - Change a couple of 'if/else if' statements to switch constructs to avoid calling the helper twice Reported-by: Ilya Ponetayev Fixes: d8b9605d2697 ("net: sched: fix skb->protocol use in case of accelerated vlan path") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 28 ++++++++++++++++++++++++++++ include/net/inet_ecn.h | 25 +++++++++++++++++-------- include/net/pkt_sched.h | 11 ----------- net/core/filter.c | 10 +++++++--- net/sched/act_connmark.c | 9 ++++++--- net/sched/act_csum.c | 2 +- net/sched/act_ct.c | 9 ++++----- net/sched/act_ctinfo.c | 9 ++++++--- net/sched/act_mpls.c | 2 +- net/sched/act_skbedit.c | 2 +- net/sched/cls_api.c | 2 +- net/sched/cls_flow.c | 8 ++++---- net/sched/cls_flower.c | 2 +- net/sched/em_ipset.c | 2 +- net/sched/em_ipt.c | 2 +- net/sched/em_meta.c | 2 +- net/sched/sch_cake.c | 4 ++-- net/sched/sch_dsmark.c | 6 +++--- net/sched/sch_teql.c | 2 +- 19 files changed, 86 insertions(+), 51 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b05e855f1ddd..427a5b8597c2 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -308,6 +308,34 @@ static inline bool eth_type_vlan(__be16 ethertype) } } +/* A getter for the SKB protocol field which will handle VLAN tags consistently + * whether VLAN acceleration is enabled or not. + */ +static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) +{ + unsigned int offset = skb_mac_offset(skb) + sizeof(struct ethhdr); + __be16 proto = skb->protocol; + + if (!skip_vlan) + /* VLAN acceleration strips the VLAN header from the skb and + * moves it to skb->vlan_proto + */ + return skb_vlan_tag_present(skb) ? skb->vlan_proto : proto; + + while (eth_type_vlan(proto)) { + struct vlan_hdr vhdr, *vh; + + vh = skb_header_pointer(skb, offset, sizeof(vhdr), &vhdr); + if (!vh) + break; + + proto = vh->h_vlan_encapsulated_proto; + offset += sizeof(vhdr); + } + + return proto; +} + static inline bool vlan_hw_offload_capable(netdev_features_t features, __be16 proto) { diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 0f0d1efe06dd..e1eaf1780288 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -172,7 +173,7 @@ static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) static inline int INET_ECN_set_ce(struct sk_buff *skb) { - switch (skb->protocol) { + switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) @@ -191,7 +192,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) static inline int INET_ECN_set_ect1(struct sk_buff *skb) { - switch (skb->protocol) { + switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) @@ -272,12 +273,16 @@ static inline int IP_ECN_decapsulate(const struct iphdr *oiph, { __u8 inner; - if (skb->protocol == htons(ETH_P_IP)) + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) + break; + case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); - else + break; + default: return 0; + } return INET_ECN_decapsulate(skb, oiph->tos, inner); } @@ -287,12 +292,16 @@ static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h, { __u8 inner; - if (skb->protocol == htons(ETH_P_IP)) + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) + break; + case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); - else + break; + default: return 0; + } return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner); } diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 9092e697059e..ac8c890a2657 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -136,17 +136,6 @@ static inline void qdisc_run(struct Qdisc *q) } } -static inline __be16 tc_skb_protocol(const struct sk_buff *skb) -{ - /* We need to take extra care in case the skb came via - * vlan accelerated path. In that case, use skb->vlan_proto - * as the original vlan header was already stripped. - */ - if (skb_vlan_tag_present(skb)) - return skb->vlan_proto; - return skb->protocol; -} - /* Calculate maximal size of packet seen by hard_start_xmit routine of this device. */ diff --git a/net/core/filter.c b/net/core/filter.c index 73395384afe2..82e1b5b06167 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5853,12 +5853,16 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) { unsigned int iphdr_len; - if (skb->protocol == cpu_to_be16(ETH_P_IP)) + switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): iphdr_len = sizeof(struct iphdr); - else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) + break; + case cpu_to_be16(ETH_P_IPV6): iphdr_len = sizeof(struct ipv6hdr); - else + break; + default: return 0; + } if (skb_headlen(skb) < iphdr_len) return 0; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 43a243081e7d..f901421b0634 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -43,17 +43,20 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, tcf_lastuse_update(&ca->tcf_tm); bstats_update(&ca->tcf_bstats, skb); - if (skb->protocol == htons(ETH_P_IP)) { + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): if (skb->len < sizeof(struct iphdr)) goto out; proto = NFPROTO_IPV4; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + break; + case htons(ETH_P_IPV6): if (skb->len < sizeof(struct ipv6hdr)) goto out; proto = NFPROTO_IPV6; - } else { + break; + default: goto out; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index cb8608f0a77a..c60674cf25c4 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -587,7 +587,7 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a, goto drop; update_flags = params->update_flags; - protocol = tc_skb_protocol(skb); + protocol = skb_protocol(skb, false); again: switch (protocol) { case cpu_to_be16(ETH_P_IP): diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index e9f3576cbf71..86ed02487467 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -624,7 +624,7 @@ static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) { u8 family = NFPROTO_UNSPEC; - switch (skb->protocol) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): family = NFPROTO_IPV4; break; @@ -748,6 +748,7 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, const struct nf_nat_range2 *range, enum nf_nat_manip_type maniptype) { + __be16 proto = skb_protocol(skb, true); int hooknum, err = NF_ACCEPT; /* See HOOK2MANIP(). */ @@ -759,14 +760,13 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: - if (skb->protocol == htons(ETH_P_IP) && + if (proto == htons(ETH_P_IP) && ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, hooknum)) err = NF_DROP; goto out; - } else if (IS_ENABLED(CONFIG_IPV6) && - skb->protocol == htons(ETH_P_IPV6)) { + } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) { __be16 frag_off; u8 nexthdr = ipv6_hdr(skb)->nexthdr; int hdrlen = ipv6_skip_exthdr(skb, @@ -1550,4 +1550,3 @@ MODULE_AUTHOR("Yossi Kuperman "); MODULE_AUTHOR("Marcelo Ricardo Leitner "); MODULE_DESCRIPTION("Connection tracking action"); MODULE_LICENSE("GPL v2"); - diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 19649623493b..b5042f3ea079 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -96,19 +96,22 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, action = READ_ONCE(ca->tcf_action); wlen = skb_network_offset(skb); - if (tc_skb_protocol(skb) == htons(ETH_P_IP)) { + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) goto out; proto = NFPROTO_IPV4; - } else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) { + break; + case htons(ETH_P_IPV6): wlen += sizeof(struct ipv6hdr); if (!pskb_may_pull(skb, wlen)) goto out; proto = NFPROTO_IPV6; - } else { + break; + default: goto out; } diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index be3f215cd027..8118e2640979 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -82,7 +82,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, goto drop; break; case TCA_MPLS_ACT_PUSH: - new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol)); + new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb_protocol(skb, true))); if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len, skb->dev && skb->dev->type == ARPHRD_ETHER)) goto drop; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index b125b2be4467..b2b3faa57294 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -41,7 +41,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, if (params->flags & SKBEDIT_F_INHERITDSFIELD) { int wlen = skb_network_offset(skb); - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index faa78b7dd962..e62beec0d844 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1538,7 +1538,7 @@ static inline int __tcf_classify(struct sk_buff *skb, reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { - __be16 protocol = tc_skb_protocol(skb); + __be16 protocol = skb_protocol(skb, false); int err; if (tp->protocol != protocol && diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 80ae7b9fa90a..ab53a93b2f2b 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -80,7 +80,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) if (dst) return ntohl(dst); - return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); + return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_proto(const struct sk_buff *skb, @@ -104,7 +104,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, if (flow->ports.ports) return ntohs(flow->ports.dst); - return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); + return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_iif(const struct sk_buff *skb) @@ -151,7 +151,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); case htons(ETH_P_IPV6): @@ -164,7 +164,7 @@ static u32 flow_get_nfct_src(const struct sk_buff *skb, static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); case htons(ETH_P_IPV6): diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b2da37286082..e30bd969fc48 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -313,7 +313,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, /* skb_flow_dissect() does not set n_proto in case an unknown * protocol, so do it rather here. */ - skb_key.basic.n_proto = skb->protocol; + skb_key.basic.n_proto = skb_protocol(skb, false); skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key); skb_flow_dissect_ct(skb, &mask->dissector, &skb_key, fl_ct_info_to_flower_map, diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index df00566d327d..c95cf86fb431 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, }; int ret, network_offset; - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): state.pf = NFPROTO_IPV4; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c index 18755d29fd15..3650117da47f 100644 --- a/net/sched/em_ipt.c +++ b/net/sched/em_ipt.c @@ -212,7 +212,7 @@ static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em, struct nf_hook_state state; int ret; - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) return 0; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index d99966a55c84..46254968d390 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -195,7 +195,7 @@ META_COLLECTOR(int_priority) META_COLLECTOR(int_protocol) { /* Let userspace take care of the byte ordering */ - dst->value = tc_skb_protocol(skb); + dst->value = skb_protocol(skb, false); } META_COLLECTOR(int_pkttype) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index ca813697728e..ebaeec1e5c82 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -592,7 +592,7 @@ static bool cake_update_flowkeys(struct flow_keys *keys, bool rev = !skb->_nfct, upd = false; __be32 ip; - if (tc_skb_protocol(skb) != htons(ETH_P_IP)) + if (skb_protocol(skb, true) != htons(ETH_P_IP)) return false; if (!nf_ct_get_tuple_skb(&tuple, skb)) @@ -1557,7 +1557,7 @@ static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash) u16 *buf, buf_; u8 dscp; - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); if (unlikely(!buf)) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 05605b30bef3..2b88710994d7 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (p->set_tc_index) { int wlen = skb_network_offset(skb); - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen) || @@ -303,7 +303,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) index = skb->tc_index & (p->indices - 1); pr_debug("index %d->%d\n", skb->tc_index, index); - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, p->mv[index].value); @@ -320,7 +320,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) */ if (p->mv[index].mask != 0xff || p->mv[index].value) pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(tc_skb_protocol(skb))); + __func__, ntohs(skb_protocol(skb, true))); break; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 689ef6f3ded8..2f1f0a378408 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -239,7 +239,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, char haddr[MAX_ADDR_LEN]; neigh_ha_snapshot(haddr, n, dev); - err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)), + err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)), haddr, NULL, skb->len); if (err < 0) From a5f526ecb075a08c4a082355020166c7fe13ae27 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 3 Jul 2020 23:54:35 -0700 Subject: [PATCH 149/343] CodingStyle: Inclusive Terminology Linux maintains a coding-style and its own idiomatic set of terminology. Update the style guidelines to recommend replacements for the terms master/slave and blacklist/whitelist. Link: http://lore.kernel.org/r/159389297140.2210796.13590142254668787525.stgit@dwillia2-desk3.amr.corp.intel.com Acked-by: Randy Dunlap Acked-by: Dave Airlie Acked-by: SeongJae Park Acked-by: Christian Brauner Acked-by: James Bottomley Acked-by: Daniel Vetter Acked-by: Andy Lutomirski Acked-by: Laura Abbott Acked-by: Gustavo A. R. Silva Reviewed-by: Matthias Brugger Reviewed-by: Mark Brown Signed-off-by: Stephen Hemminger Signed-off-by: Theodore Ts'o Signed-off-by: Shuah Khan Signed-off-by: Dan Carpenter Signed-off-by: Kees Cook Signed-off-by: Olof Johansson Signed-off-by: Jonathan Corbet Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman Signed-off-by: Dan Williams --- Documentation/process/coding-style.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst index 2657a55c6f12..1bee6f8affdb 100644 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@ -319,6 +319,26 @@ If you are afraid to mix up your local variable names, you have another problem, which is called the function-growth-hormone-imbalance syndrome. See chapter 6 (Functions). +For symbol names and documentation, avoid introducing new usage of +'master / slave' (or 'slave' independent of 'master') and 'blacklist / +whitelist'. + +Recommended replacements for 'master / slave' are: + '{primary,main} / {secondary,replica,subordinate}' + '{initiator,requester} / {target,responder}' + '{controller,host} / {device,worker,proxy}' + 'leader / follower' + 'director / performer' + +Recommended replacements for 'blacklist/whitelist' are: + 'denylist / allowlist' + 'blocklist / passlist' + +Exceptions for introducing new usage is to maintain a userspace ABI/API, +or when updating code for an existing (as of 2020) hardware or protocol +specification that mandates those terms. For new specifications +translate specification usage of the terminology to the kernel coding +standard where possible. 5) Typedefs ----------- From e9052927941d393b4bed6f295da763172b7829f0 Mon Sep 17 00:00:00 2001 From: Bernard Zhao Date: Tue, 16 Jun 2020 14:51:01 +0800 Subject: [PATCH 150/343] drm/mediatek: Remove unnecessary conversion to bool In function mtk_dsi_clk_hs_state, remove unnecessary conversion to bool return, this change is to make the code a bit readable. Signed-off-by: Bernard Zhao Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dsi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 270bf22c98fe..02ac55c13a80 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -316,10 +316,7 @@ static void mtk_dsi_lane0_ulp_mode_leave(struct mtk_dsi *dsi) static bool mtk_dsi_clk_hs_state(struct mtk_dsi *dsi) { - u32 tmp_reg1; - - tmp_reg1 = readl(dsi->regs + DSI_PHY_LCCON); - return ((tmp_reg1 & LC_HS_TX_EN) == 1) ? true : false; + return readl(dsi->regs + DSI_PHY_LCCON) & LC_HS_TX_EN; } static void mtk_dsi_clk_hs_mode(struct mtk_dsi *dsi, bool enter) From deb0f88b2208e9e6dbe39a0cbb3cdc458580d999 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Mon, 18 May 2020 13:22:54 +0200 Subject: [PATCH 151/343] drm/mediatek: Delete not used of_device_get_match_data The driver will be loaded by via a platform device. So we will need to get the device_node from the parent device. Depending on this we will set the driver data. As all this is done later already, just delete the call to of_device_get_match_data. Signed-off-by: Matthias Brugger Reviewed-by: Enric Balletbo i Serra Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index c043ec6c8166..040a8f393fe2 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -444,7 +444,6 @@ static int mtk_drm_probe(struct platform_device *pdev) if (!private) return -ENOMEM; - private->data = of_device_get_match_data(dev); private->mmsys_dev = dev->parent; if (!private->mmsys_dev) { dev_err(dev, "Failed to get MMSYS device\n"); From 3a7826cc24e56ba8d3d18a202968553bdf687d98 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Tue, 19 May 2020 11:40:45 +0200 Subject: [PATCH 152/343] drm/mediatek: mtk_mt8173_hdmi_phy: Remove unnused const variables There are some `static const u8` variables that are not used, this triggers a warning building with `make W=1`, it is safe to remove them, so do it and make the compiler more happy. Signed-off-by: Enric Balletbo i Serra Signed-off-by: Chun-Kuang Hu --- .../gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c | 48 ------------------- 1 file changed, 48 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c index b55f51675205..9478490d55aa 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c @@ -107,54 +107,6 @@ #define RGS_HDMITX_5T1_EDG (0xf << 4) #define RGS_HDMITX_PLUG_TST BIT(0) -static const u8 PREDIV[3][4] = { - {0x0, 0x0, 0x0, 0x0}, /* 27Mhz */ - {0x1, 0x1, 0x1, 0x1}, /* 74Mhz */ - {0x1, 0x1, 0x1, 0x1} /* 148Mhz */ -}; - -static const u8 TXDIV[3][4] = { - {0x3, 0x3, 0x3, 0x2}, /* 27Mhz */ - {0x2, 0x1, 0x1, 0x1}, /* 74Mhz */ - {0x1, 0x0, 0x0, 0x0} /* 148Mhz */ -}; - -static const u8 FBKSEL[3][4] = { - {0x1, 0x1, 0x1, 0x1}, /* 27Mhz */ - {0x1, 0x0, 0x1, 0x1}, /* 74Mhz */ - {0x1, 0x0, 0x1, 0x1} /* 148Mhz */ -}; - -static const u8 FBKDIV[3][4] = { - {19, 24, 29, 19}, /* 27Mhz */ - {19, 24, 14, 19}, /* 74Mhz */ - {19, 24, 14, 19} /* 148Mhz */ -}; - -static const u8 DIVEN[3][4] = { - {0x2, 0x1, 0x1, 0x2}, /* 27Mhz */ - {0x2, 0x2, 0x2, 0x2}, /* 74Mhz */ - {0x2, 0x2, 0x2, 0x2} /* 148Mhz */ -}; - -static const u8 HTPLLBP[3][4] = { - {0xc, 0xc, 0x8, 0xc}, /* 27Mhz */ - {0xc, 0xf, 0xf, 0xc}, /* 74Mhz */ - {0xc, 0xf, 0xf, 0xc} /* 148Mhz */ -}; - -static const u8 HTPLLBC[3][4] = { - {0x2, 0x3, 0x3, 0x2}, /* 27Mhz */ - {0x2, 0x3, 0x3, 0x2}, /* 74Mhz */ - {0x2, 0x3, 0x3, 0x2} /* 148Mhz */ -}; - -static const u8 HTPLLBR[3][4] = { - {0x1, 0x1, 0x0, 0x1}, /* 27Mhz */ - {0x1, 0x2, 0x2, 0x1}, /* 74Mhz */ - {0x1, 0x2, 0x2, 0x1} /* 148Mhz */ -}; - static int mtk_hdmi_pll_prepare(struct clk_hw *hw) { struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); From 5ab546f5e6309373aef01a8d398e163ab7a78431 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Tue, 19 May 2020 11:41:15 +0200 Subject: [PATCH 153/343] drm/mediatek: mtk_hdmi: Remove debug messages for function calls Equivalent information can be nowadays obtained using function tracer Signed-off-by: Enric Balletbo i Serra Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 12 +----------- drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c | 4 ---- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 5feb760617cb..1eebe310470a 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1630,8 +1630,6 @@ static int mtk_hdmi_audio_startup(struct device *dev, void *data) { struct mtk_hdmi *hdmi = dev_get_drvdata(dev); - dev_dbg(dev, "%s\n", __func__); - mtk_hdmi_audio_enable(hdmi); return 0; @@ -1641,8 +1639,6 @@ static void mtk_hdmi_audio_shutdown(struct device *dev, void *data) { struct mtk_hdmi *hdmi = dev_get_drvdata(dev); - dev_dbg(dev, "%s\n", __func__); - mtk_hdmi_audio_disable(hdmi); } @@ -1651,8 +1647,6 @@ mtk_hdmi_audio_digital_mute(struct device *dev, void *data, bool enable) { struct mtk_hdmi *hdmi = dev_get_drvdata(dev); - dev_dbg(dev, "%s(%d)\n", __func__, enable); - if (enable) mtk_hdmi_hw_aud_mute(hdmi); else @@ -1665,8 +1659,6 @@ static int mtk_hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, { struct mtk_hdmi *hdmi = dev_get_drvdata(dev); - dev_dbg(dev, "%s\n", __func__); - memcpy(buf, hdmi->conn.eld, min(sizeof(hdmi->conn.eld), len)); return 0; @@ -1766,7 +1758,6 @@ static int mtk_drm_hdmi_probe(struct platform_device *pdev) goto err_bridge_remove; } - dev_dbg(dev, "mediatek hdmi probe success\n"); return 0; err_bridge_remove: @@ -1789,7 +1780,7 @@ static int mtk_hdmi_suspend(struct device *dev) struct mtk_hdmi *hdmi = dev_get_drvdata(dev); mtk_hdmi_clk_disable_audio(hdmi); - dev_dbg(dev, "hdmi suspend success!\n"); + return 0; } @@ -1804,7 +1795,6 @@ static int mtk_hdmi_resume(struct device *dev) return ret; } - dev_dbg(dev, "hdmi resume success!\n"); return 0; } #endif diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c index 9478490d55aa..827b93786fac 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c @@ -111,8 +111,6 @@ static int mtk_hdmi_pll_prepare(struct clk_hw *hw) { struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); - dev_dbg(hdmi_phy->dev, "%s\n", __func__); - mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON1, RG_HDMITX_PLL_AUTOK_EN); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_PLL_POSDIV); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON3, RG_HDMITX_MHLCK_EN); @@ -130,8 +128,6 @@ static void mtk_hdmi_pll_unprepare(struct clk_hw *hw) { struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); - dev_dbg(hdmi_phy->dev, "%s\n", __func__); - mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON1, RG_HDMITX_PLL_TXDIV_EN); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON1, RG_HDMITX_PLL_BIAS_LPF_EN); usleep_range(100, 150); From caebecb0326907e978b064926836b48f94fec62f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 3 Jul 2020 15:41:09 -0700 Subject: [PATCH 154/343] Documentation: networking: arcnet: drop doubled word Drop the doubled word "to". Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- Documentation/networking/arcnet.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/arcnet.rst b/Documentation/networking/arcnet.rst index e93d9820f0f1..82fce606c0f0 100644 --- a/Documentation/networking/arcnet.rst +++ b/Documentation/networking/arcnet.rst @@ -434,7 +434,7 @@ can set up your network then: ifconfig arc0 insight route add insight arc0 route add freedom arc0 /* I would use the subnet here (like I said - to to in "single protocol" above), + to in "single protocol" above), but the rest of the subnet unfortunately lies across the PPP link on freedom, which confuses From e99094856d0ab192501c3e6e2ff77b3ae44f6445 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 3 Jul 2020 15:41:10 -0700 Subject: [PATCH 155/343] Documentation: networking: ax25: drop doubled word Drop the doubled word "and". Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Cc: Ralf Baechle Cc: linux-hams@vger.kernel.org Signed-off-by: David S. Miller --- Documentation/networking/ax25.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/ax25.rst b/Documentation/networking/ax25.rst index 824afd7002db..f060cfb1445a 100644 --- a/Documentation/networking/ax25.rst +++ b/Documentation/networking/ax25.rst @@ -6,7 +6,7 @@ AX.25 To use the amateur radio protocols within Linux you will need to get a suitable copy of the AX.25 Utilities. More detailed information about -AX.25, NET/ROM and ROSE, associated programs and and utilities can be +AX.25, NET/ROM and ROSE, associated programs and utilities can be found on http://www.linux-ax25.org. There is an active mailing list for discussing Linux amateur radio matters From 6d0fe3aea4a815929118a002a342849e6b9f0cfd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 3 Jul 2020 15:41:11 -0700 Subject: [PATCH 156/343] Documentation: networking: can_ucan_protocol: drop doubled words Drop the doubled words "the" and "of". Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Cc: Wolfgang Grandegger Cc: Marc Kleine-Budde Cc: linux-can@vger.kernel.org Acked-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- Documentation/networking/can_ucan_protocol.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/can_ucan_protocol.rst b/Documentation/networking/can_ucan_protocol.rst index 4cef88d24fc7..638ac1ee7914 100644 --- a/Documentation/networking/can_ucan_protocol.rst +++ b/Documentation/networking/can_ucan_protocol.rst @@ -144,7 +144,7 @@ UCAN_COMMAND_SET_BITTIMING *Host2Dev; mandatory* -Setup bittiming by sending the the structure +Setup bittiming by sending the structure ``ucan_ctl_payload_t.cmd_set_bittiming`` (see ``struct bittiming`` for details) @@ -232,7 +232,7 @@ UCAN_IN_TX_COMPLETE zero The CAN device has sent a message to the CAN bus. It answers with a -list of of tuples . +list of tuples . The echo-id identifies the frame from (echos the id from a previous UCAN_OUT_TX message). The flag indicates the result of the From 4f6a009c8bdd69b611cf3a807128314992d07bf9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 3 Jul 2020 15:41:12 -0700 Subject: [PATCH 157/343] Documentation: networking: dsa: drop doubled word Drop the doubled word "in". Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Cc: Andrew Lunn Cc: Vivien Didelot Cc: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst index 563d56c6a25c..a8d15dd2b42b 100644 --- a/Documentation/networking/dsa/dsa.rst +++ b/Documentation/networking/dsa/dsa.rst @@ -95,7 +95,7 @@ Ethernet switch. Networking stack hooks ---------------------- -When a master netdev is used with DSA, a small hook is placed in in the +When a master netdev is used with DSA, a small hook is placed in the networking stack is in order to have the DSA subsystem process the Ethernet switch specific tagging protocol. DSA accomplishes this by registering a specific (and fake) Ethernet type (later becoming ``skb->protocol``) with the From a7db3c766916e8a9a054925d036a2746d3e93596 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 3 Jul 2020 15:41:13 -0700 Subject: [PATCH 158/343] Documentation: networking: ip-sysctl: drop doubled word Drop the doubled word "that". Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index b72f89d5694c..837d51f9e1fa 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -741,7 +741,7 @@ tcp_fastopen - INTEGER Default: 0x1 - Note that that additional client or server features are only + Note that additional client or server features are only effective if the basic support (0x1 and 0x2) are enabled respectively. tcp_fastopen_blackhole_timeout_sec - INTEGER From 474112d57c70520ebd81a5ca578fee1d93fafd07 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 3 Jul 2020 15:41:14 -0700 Subject: [PATCH 159/343] Documentation: networking: ipvs-sysctl: drop doubled word Drop the doubled word "that". Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- Documentation/networking/ipvs-sysctl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst index be36c4600e8f..2afccc63856e 100644 --- a/Documentation/networking/ipvs-sysctl.rst +++ b/Documentation/networking/ipvs-sysctl.rst @@ -114,7 +114,7 @@ drop_entry - INTEGER modes (when there is no enough available memory, the strategy is enabled and the variable is automatically set to 2, otherwise the strategy is disabled and the variable is set to - 1), and 3 means that that the strategy is always enabled. + 1), and 3 means that the strategy is always enabled. drop_packet - INTEGER - 0 - disabled (default) From e54ac95afb2fe9dabcbb139b1705d5a8fe96345a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 3 Jul 2020 15:41:15 -0700 Subject: [PATCH 160/343] Documentation: networking: rxrpc: drop doubled word Drop the doubled word "have". Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Cc: David Howells Cc: linux-afs@lists.infradead.org Signed-off-by: David S. Miller --- Documentation/networking/rxrpc.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst index 68552b92dc44..39c2249c7aa7 100644 --- a/Documentation/networking/rxrpc.rst +++ b/Documentation/networking/rxrpc.rst @@ -186,7 +186,7 @@ About the AF_RXRPC driver: time [tunable] after the last connection using it discarded, in case a new connection is made that could use it. - (#) A client-side connection is only shared between calls if they have have + (#) A client-side connection is only shared between calls if they have the same key struct describing their security (and assuming the calls would otherwise share the connection). Non-secured calls would also be able to share connections with each other. From 6dbb89014dc303facc54d33ae64419d2f9c8ff32 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Sat, 4 Jul 2020 15:32:43 +0800 Subject: [PATCH 161/343] hinic: fix sending mailbox timeout in aeq event work When sending mailbox in the work of aeq event, another aeq event will be triggered. because the last aeq work is not exited and only one work can be excuted simultaneously in the same workqueue, mailbox sending function will return failure of timeout. We create and use another workqueue to fix this. Signed-off-by: Luo bin Signed-off-by: David S. Miller --- .../net/ethernet/huawei/hinic/hinic_hw_mgmt.c | 109 +++++++++++++----- .../net/ethernet/huawei/hinic/hinic_hw_mgmt.h | 16 +++ 2 files changed, 97 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c index c33eb1147055..e0f5a81d8620 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c @@ -370,6 +370,53 @@ int hinic_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt, MSG_NOT_RESP, timeout); } +static void recv_mgmt_msg_work_handler(struct work_struct *work) +{ + struct hinic_mgmt_msg_handle_work *mgmt_work = + container_of(work, struct hinic_mgmt_msg_handle_work, work); + struct hinic_pf_to_mgmt *pf_to_mgmt = mgmt_work->pf_to_mgmt; + struct pci_dev *pdev = pf_to_mgmt->hwif->pdev; + u8 *buf_out = pf_to_mgmt->mgmt_ack_buf; + struct hinic_mgmt_cb *mgmt_cb; + unsigned long cb_state; + u16 out_size = 0; + + memset(buf_out, 0, MAX_PF_MGMT_BUF_SIZE); + + if (mgmt_work->mod >= HINIC_MOD_MAX) { + dev_err(&pdev->dev, "Unknown MGMT MSG module = %d\n", + mgmt_work->mod); + kfree(mgmt_work->msg); + kfree(mgmt_work); + return; + } + + mgmt_cb = &pf_to_mgmt->mgmt_cb[mgmt_work->mod]; + + cb_state = cmpxchg(&mgmt_cb->state, + HINIC_MGMT_CB_ENABLED, + HINIC_MGMT_CB_ENABLED | HINIC_MGMT_CB_RUNNING); + + if ((cb_state == HINIC_MGMT_CB_ENABLED) && (mgmt_cb->cb)) + mgmt_cb->cb(mgmt_cb->handle, mgmt_work->cmd, + mgmt_work->msg, mgmt_work->msg_len, + buf_out, &out_size); + else + dev_err(&pdev->dev, "No MGMT msg handler, mod: %d, cmd: %d\n", + mgmt_work->mod, mgmt_work->cmd); + + mgmt_cb->state &= ~HINIC_MGMT_CB_RUNNING; + + if (!mgmt_work->async_mgmt_to_pf) + /* MGMT sent sync msg, send the response */ + msg_to_mgmt_async(pf_to_mgmt, mgmt_work->mod, mgmt_work->cmd, + buf_out, out_size, MGMT_RESP, + mgmt_work->msg_id); + + kfree(mgmt_work->msg); + kfree(mgmt_work); +} + /** * mgmt_recv_msg_handler - handler for message from mgmt cpu * @pf_to_mgmt: PF to MGMT channel @@ -378,40 +425,34 @@ int hinic_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt, static void mgmt_recv_msg_handler(struct hinic_pf_to_mgmt *pf_to_mgmt, struct hinic_recv_msg *recv_msg) { - struct hinic_hwif *hwif = pf_to_mgmt->hwif; - struct pci_dev *pdev = hwif->pdev; - u8 *buf_out = recv_msg->buf_out; - struct hinic_mgmt_cb *mgmt_cb; - unsigned long cb_state; - u16 out_size = 0; + struct hinic_mgmt_msg_handle_work *mgmt_work = NULL; + struct pci_dev *pdev = pf_to_mgmt->hwif->pdev; - if (recv_msg->mod >= HINIC_MOD_MAX) { - dev_err(&pdev->dev, "Unknown MGMT MSG module = %d\n", - recv_msg->mod); + mgmt_work = kzalloc(sizeof(*mgmt_work), GFP_KERNEL); + if (!mgmt_work) { + dev_err(&pdev->dev, "Allocate mgmt work memory failed\n"); return; } - mgmt_cb = &pf_to_mgmt->mgmt_cb[recv_msg->mod]; + if (recv_msg->msg_len) { + mgmt_work->msg = kzalloc(recv_msg->msg_len, GFP_KERNEL); + if (!mgmt_work->msg) { + dev_err(&pdev->dev, "Allocate mgmt msg memory failed\n"); + kfree(mgmt_work); + return; + } + } - cb_state = cmpxchg(&mgmt_cb->state, - HINIC_MGMT_CB_ENABLED, - HINIC_MGMT_CB_ENABLED | HINIC_MGMT_CB_RUNNING); + mgmt_work->pf_to_mgmt = pf_to_mgmt; + mgmt_work->msg_len = recv_msg->msg_len; + memcpy(mgmt_work->msg, recv_msg->msg, recv_msg->msg_len); + mgmt_work->msg_id = recv_msg->msg_id; + mgmt_work->mod = recv_msg->mod; + mgmt_work->cmd = recv_msg->cmd; + mgmt_work->async_mgmt_to_pf = recv_msg->async_mgmt_to_pf; - if ((cb_state == HINIC_MGMT_CB_ENABLED) && (mgmt_cb->cb)) - mgmt_cb->cb(mgmt_cb->handle, recv_msg->cmd, - recv_msg->msg, recv_msg->msg_len, - buf_out, &out_size); - else - dev_err(&pdev->dev, "No MGMT msg handler, mod: %d, cmd: %d\n", - recv_msg->mod, recv_msg->cmd); - - mgmt_cb->state &= ~HINIC_MGMT_CB_RUNNING; - - if (!recv_msg->async_mgmt_to_pf) - /* MGMT sent sync msg, send the response */ - msg_to_mgmt_async(pf_to_mgmt, recv_msg->mod, recv_msg->cmd, - buf_out, out_size, MGMT_RESP, - recv_msg->msg_id); + INIT_WORK(&mgmt_work->work, recv_mgmt_msg_work_handler); + queue_work(pf_to_mgmt->workq, &mgmt_work->work); } /** @@ -546,6 +587,12 @@ static int alloc_msg_buf(struct hinic_pf_to_mgmt *pf_to_mgmt) if (!pf_to_mgmt->sync_msg_buf) return -ENOMEM; + pf_to_mgmt->mgmt_ack_buf = devm_kzalloc(&pdev->dev, + MAX_PF_MGMT_BUF_SIZE, + GFP_KERNEL); + if (!pf_to_mgmt->mgmt_ack_buf) + return -ENOMEM; + return 0; } @@ -571,6 +618,11 @@ int hinic_pf_to_mgmt_init(struct hinic_pf_to_mgmt *pf_to_mgmt, return 0; sema_init(&pf_to_mgmt->sync_msg_lock, 1); + pf_to_mgmt->workq = create_singlethread_workqueue("hinic_mgmt"); + if (!pf_to_mgmt->workq) { + dev_err(&pdev->dev, "Failed to initialize MGMT workqueue\n"); + return -ENOMEM; + } pf_to_mgmt->sync_msg_id = 0; err = alloc_msg_buf(pf_to_mgmt); @@ -605,4 +657,5 @@ void hinic_pf_to_mgmt_free(struct hinic_pf_to_mgmt *pf_to_mgmt) hinic_aeq_unregister_hw_cb(&hwdev->aeqs, HINIC_MSG_FROM_MGMT_CPU); hinic_api_cmd_free(pf_to_mgmt->cmd_chain); + destroy_workqueue(pf_to_mgmt->workq); } diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h index c2b142c08b0e..a824fbda59db 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h @@ -119,6 +119,7 @@ struct hinic_pf_to_mgmt { struct semaphore sync_msg_lock; u16 sync_msg_id; u8 *sync_msg_buf; + void *mgmt_ack_buf; struct hinic_recv_msg recv_resp_msg_from_mgmt; struct hinic_recv_msg recv_msg_from_mgmt; @@ -126,6 +127,21 @@ struct hinic_pf_to_mgmt { struct hinic_api_cmd_chain *cmd_chain[HINIC_API_CMD_MAX]; struct hinic_mgmt_cb mgmt_cb[HINIC_MOD_MAX]; + + struct workqueue_struct *workq; +}; + +struct hinic_mgmt_msg_handle_work { + struct work_struct work; + struct hinic_pf_to_mgmt *pf_to_mgmt; + + void *msg; + u16 msg_len; + + enum hinic_mod_type mod; + u8 cmd; + u16 msg_id; + int async_mgmt_to_pf; }; void hinic_register_mgmt_msg_cb(struct hinic_pf_to_mgmt *pf_to_mgmt, From ccfc9df1352be5b2f391091e18c4b2395d30ce78 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 2 Jul 2020 17:06:19 +0000 Subject: [PATCH 162/343] hsr: fix interface leak in error path of hsr_dev_finalize() To release hsr(upper) interface, it should release its own lower interfaces first. Then, hsr(upper) interface can be released safely. In the current code of error path of hsr_dev_finalize(), it releases hsr interface before releasing a lower interface. So, a warning occurs, which warns about the leak of lower interfaces. In order to fix this problem, changing the ordering of the error path of hsr_dev_finalize() is needed. Test commands: ip link add dummy0 type dummy ip link add dummy1 type dummy ip link add dummy2 type dummy ip link add hsr0 type hsr slave1 dummy0 slave2 dummy1 ip link add hsr1 type hsr slave1 dummy2 slave2 dummy0 Splat looks like: [ 214.923127][ C2] WARNING: CPU: 2 PID: 1093 at net/core/dev.c:8992 rollback_registered_many+0x986/0xcf0 [ 214.923129][ C2] Modules linked in: hsr dummy openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipx [ 214.923154][ C2] CPU: 2 PID: 1093 Comm: ip Not tainted 5.8.0-rc2+ #623 [ 214.923156][ C2] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 214.923157][ C2] RIP: 0010:rollback_registered_many+0x986/0xcf0 [ 214.923160][ C2] Code: 41 8b 4e cc 45 31 c0 31 d2 4c 89 ee 48 89 df e8 e0 47 ff ff 85 c0 0f 84 cd fc ff ff 5 [ 214.923162][ C2] RSP: 0018:ffff8880c5156f28 EFLAGS: 00010287 [ 214.923165][ C2] RAX: ffff8880d1dad458 RBX: ffff8880bd1b9000 RCX: ffffffffb929d243 [ 214.923167][ C2] RDX: 1ffffffff77e63f0 RSI: 0000000000000008 RDI: ffffffffbbf31f80 [ 214.923168][ C2] RBP: dffffc0000000000 R08: fffffbfff77e63f1 R09: fffffbfff77e63f1 [ 214.923170][ C2] R10: ffffffffbbf31f87 R11: 0000000000000001 R12: ffff8880c51570a0 [ 214.923172][ C2] R13: ffff8880bd1b90b8 R14: ffff8880c5157048 R15: ffff8880d1dacc40 [ 214.923174][ C2] FS: 00007fdd257a20c0(0000) GS:ffff8880da200000(0000) knlGS:0000000000000000 [ 214.923175][ C2] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 214.923177][ C2] CR2: 00007ffd78beb038 CR3: 00000000be544005 CR4: 00000000000606e0 [ 214.923179][ C2] Call Trace: [ 214.923180][ C2] ? netif_set_real_num_tx_queues+0x780/0x780 [ 214.923182][ C2] ? dev_validate_mtu+0x140/0x140 [ 214.923183][ C2] ? synchronize_rcu.part.79+0x85/0xd0 [ 214.923185][ C2] ? synchronize_rcu_expedited+0xbb0/0xbb0 [ 214.923187][ C2] rollback_registered+0xc8/0x170 [ 214.923188][ C2] ? rollback_registered_many+0xcf0/0xcf0 [ 214.923190][ C2] unregister_netdevice_queue+0x18b/0x240 [ 214.923191][ C2] hsr_dev_finalize+0x56e/0x6e0 [hsr] [ 214.923192][ C2] hsr_newlink+0x36b/0x450 [hsr] [ 214.923194][ C2] ? hsr_dellink+0x70/0x70 [hsr] [ 214.923195][ C2] ? rtnl_create_link+0x2e4/0xb00 [ 214.923197][ C2] ? __netlink_ns_capable+0xc3/0xf0 [ 214.923198][ C2] __rtnl_newlink+0xbdb/0x1270 [ ... ] Fixes: e0a4b99773d3 ("hsr: use upper/lower device infrastructure") Reported-by: syzbot+7f1c020f68dab95aab59@syzkaller.appspotmail.com Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 478852ef98ef..a6f4e9f65b14 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -415,6 +415,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], unsigned char multicast_spec, u8 protocol_version, struct netlink_ext_ack *extack) { + bool unregister = false; struct hsr_priv *hsr; int res; @@ -466,25 +467,27 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (res) goto err_unregister; + unregister = true; + res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A, extack); if (res) - goto err_add_slaves; + goto err_unregister; res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B, extack); if (res) - goto err_add_slaves; + goto err_unregister; hsr_debugfs_init(hsr, hsr_dev); mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); return 0; -err_add_slaves: - unregister_netdevice(hsr_dev); err_unregister: hsr_del_ports(hsr); err_add_master: hsr_del_self_node(hsr); + if (unregister) + unregister_netdevice(hsr_dev); return res; } From 2a762e9e8cd1cf1242e4269a2244666ed02eecd1 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 2 Jul 2020 17:08:18 +0000 Subject: [PATCH 163/343] net: rmnet: fix lower interface leak There are two types of the lower interface of rmnet that are VND and BRIDGE. Each lower interface can have only one type either VND or BRIDGE. But, there is a case, which uses both lower interface types. Due to this unexpected behavior, lower interface leak occurs. Test commands: ip link add dummy0 type dummy ip link add dummy1 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link set dummy1 master rmnet0 ip link add rmnet1 link dummy1 type rmnet mux_id 2 ip link del rmnet0 The dummy1 was attached as BRIDGE interface of rmnet0. Then, it also was attached as VND interface of rmnet1. This is unexpected behavior and there is no code for handling this case. So that below splat occurs when the rmnet0 interface is deleted. Splat looks like: [ 53.254112][ C1] WARNING: CPU: 1 PID: 1192 at net/core/dev.c:8992 rollback_registered_many+0x986/0xcf0 [ 53.254117][ C1] Modules linked in: rmnet dummy openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nfx [ 53.254182][ C1] CPU: 1 PID: 1192 Comm: ip Not tainted 5.8.0-rc1+ #620 [ 53.254188][ C1] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 53.254192][ C1] RIP: 0010:rollback_registered_many+0x986/0xcf0 [ 53.254200][ C1] Code: 41 8b 4e cc 45 31 c0 31 d2 4c 89 ee 48 89 df e8 e0 47 ff ff 85 c0 0f 84 cd fc ff ff 0f 0b e5 [ 53.254205][ C1] RSP: 0018:ffff888050a5f2e0 EFLAGS: 00010287 [ 53.254214][ C1] RAX: ffff88805756d658 RBX: ffff88804d99c000 RCX: ffffffff8329d323 [ 53.254219][ C1] RDX: 1ffffffff0be6410 RSI: 0000000000000008 RDI: ffffffff85f32080 [ 53.254223][ C1] RBP: dffffc0000000000 R08: fffffbfff0be6411 R09: fffffbfff0be6411 [ 53.254228][ C1] R10: ffffffff85f32087 R11: 0000000000000001 R12: ffff888050a5f480 [ 53.254233][ C1] R13: ffff88804d99c0b8 R14: ffff888050a5f400 R15: ffff8880548ebe40 [ 53.254238][ C1] FS: 00007f6b86b370c0(0000) GS:ffff88806c200000(0000) knlGS:0000000000000000 [ 53.254243][ C1] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 53.254248][ C1] CR2: 0000562c62438758 CR3: 000000003f600005 CR4: 00000000000606e0 [ 53.254253][ C1] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 53.254257][ C1] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 53.254261][ C1] Call Trace: [ 53.254266][ C1] ? lockdep_hardirqs_on_prepare+0x379/0x540 [ 53.254270][ C1] ? netif_set_real_num_tx_queues+0x780/0x780 [ 53.254275][ C1] ? rmnet_unregister_real_device+0x56/0x90 [rmnet] [ 53.254279][ C1] ? __kasan_slab_free+0x126/0x150 [ 53.254283][ C1] ? kfree+0xdc/0x320 [ 53.254288][ C1] ? rmnet_unregister_real_device+0x56/0x90 [rmnet] [ 53.254293][ C1] unregister_netdevice_many.part.135+0x13/0x1b0 [ 53.254297][ C1] rtnl_delete_link+0xbc/0x100 [ 53.254301][ C1] ? rtnl_af_register+0xc0/0xc0 [ 53.254305][ C1] rtnl_dellink+0x2dc/0x840 [ 53.254309][ C1] ? find_held_lock+0x39/0x1d0 [ 53.254314][ C1] ? valid_fdb_dump_strict+0x620/0x620 [ 53.254318][ C1] ? rtnetlink_rcv_msg+0x457/0x890 [ 53.254322][ C1] ? lock_contended+0xd20/0xd20 [ 53.254326][ C1] rtnetlink_rcv_msg+0x4a8/0x890 [ ... ] [ 73.813696][ T1192] unregister_netdevice: waiting for rmnet0 to become free. Usage count = 1 Fixes: 037f9cdf72fb ("net: rmnet: use upper/lower device infrastructure") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- .../ethernet/qualcomm/rmnet/rmnet_config.c | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 40efe60eff8d..2c8c252b7b97 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -47,15 +47,23 @@ static int rmnet_unregister_real_device(struct net_device *real_dev) return 0; } -static int rmnet_register_real_device(struct net_device *real_dev) +static int rmnet_register_real_device(struct net_device *real_dev, + struct netlink_ext_ack *extack) { struct rmnet_port *port; int rc, entry; ASSERT_RTNL(); - if (rmnet_is_real_dev_registered(real_dev)) + if (rmnet_is_real_dev_registered(real_dev)) { + port = rmnet_get_port_rtnl(real_dev); + if (port->rmnet_mode != RMNET_EPMODE_VND) { + NL_SET_ERR_MSG_MOD(extack, "bridge device already exists"); + return -EINVAL; + } + return 0; + } port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) @@ -133,7 +141,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]); - err = rmnet_register_real_device(real_dev); + err = rmnet_register_real_device(real_dev, extack); if (err) goto err0; @@ -421,11 +429,6 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, return -EINVAL; } - if (port->rmnet_mode != RMNET_EPMODE_VND) { - NL_SET_ERR_MSG_MOD(extack, "bridge device already exists"); - return -EINVAL; - } - if (rmnet_is_real_dev_registered(slave_dev)) { NL_SET_ERR_MSG_MOD(extack, "slave cannot be another rmnet dev"); @@ -433,7 +436,7 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, return -EBUSY; } - err = rmnet_register_real_device(slave_dev); + err = rmnet_register_real_device(slave_dev, extack); if (err) return -EBUSY; From 2fb2799a2abb39d7dbb48abb3baa1133bf5e921a Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 2 Jul 2020 17:08:55 +0000 Subject: [PATCH 164/343] net: rmnet: do not allow to add multiple bridge interfaces rmnet can have only two bridge interface. One of them is a link interface and another one is added by the master operation. rmnet interface shouldn't allow adding additional bridge interfaces by mater operation. But, there is no code to deny additional interfaces. So, interface leak occurs. Test commands: ip link add dummy0 type dummy ip link add dummy1 type dummy ip link add dummy2 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link set dummy1 master rmnet0 ip link set dummy2 master rmnet0 ip link del rmnet0 In the above test command, the dummy0 was attached to rmnet as VND mode. Then, dummy1 was attached to rmnet0 as BRIDGE mode. At this point, dummy0 mode is switched from VND to BRIDGE automatically. Then, dummy2 is attached to rmnet as BRIDGE mode. At this point, rmnet0 should deny this operation. But, rmnet0 doesn't deny this. So that below splat occurs when the rmnet0 interface is deleted. Splat looks like: [ 186.684787][ C2] WARNING: CPU: 2 PID: 1009 at net/core/dev.c:8992 rollback_registered_many+0x986/0xcf0 [ 186.684788][ C2] Modules linked in: rmnet dummy openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_x [ 186.684805][ C2] CPU: 2 PID: 1009 Comm: ip Not tainted 5.8.0-rc1+ #621 [ 186.684807][ C2] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 186.684808][ C2] RIP: 0010:rollback_registered_many+0x986/0xcf0 [ 186.684811][ C2] Code: 41 8b 4e cc 45 31 c0 31 d2 4c 89 ee 48 89 df e8 e0 47 ff ff 85 c0 0f 84 cd fc ff ff 5 [ 186.684812][ C2] RSP: 0018:ffff8880cd9472e0 EFLAGS: 00010287 [ 186.684815][ C2] RAX: ffff8880cc56da58 RBX: ffff8880ab21c000 RCX: ffffffff9329d323 [ 186.684816][ C2] RDX: 1ffffffff2be6410 RSI: 0000000000000008 RDI: ffffffff95f32080 [ 186.684818][ C2] RBP: dffffc0000000000 R08: fffffbfff2be6411 R09: fffffbfff2be6411 [ 186.684819][ C2] R10: ffffffff95f32087 R11: 0000000000000001 R12: ffff8880cd947480 [ 186.684820][ C2] R13: ffff8880ab21c0b8 R14: ffff8880cd947400 R15: ffff8880cdf10640 [ 186.684822][ C2] FS: 00007f00843890c0(0000) GS:ffff8880d4e00000(0000) knlGS:0000000000000000 [ 186.684823][ C2] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 186.684825][ C2] CR2: 000055b8ab1077b8 CR3: 00000000ab612006 CR4: 00000000000606e0 [ 186.684826][ C2] Call Trace: [ 186.684827][ C2] ? lockdep_hardirqs_on_prepare+0x379/0x540 [ 186.684829][ C2] ? netif_set_real_num_tx_queues+0x780/0x780 [ 186.684830][ C2] ? rmnet_unregister_real_device+0x56/0x90 [rmnet] [ 186.684831][ C2] ? __kasan_slab_free+0x126/0x150 [ 186.684832][ C2] ? kfree+0xdc/0x320 [ 186.684834][ C2] ? rmnet_unregister_real_device+0x56/0x90 [rmnet] [ 186.684835][ C2] unregister_netdevice_many.part.135+0x13/0x1b0 [ 186.684836][ C2] rtnl_delete_link+0xbc/0x100 [ ... ] [ 238.440071][ T1009] unregister_netdevice: waiting for rmnet0 to become free. Usage count = 1 Fixes: 037f9cdf72fb ("net: rmnet: use upper/lower device infrastructure") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 2c8c252b7b97..fcdecddb2812 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -429,6 +429,11 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, return -EINVAL; } + if (port->rmnet_mode != RMNET_EPMODE_VND) { + NL_SET_ERR_MSG_MOD(extack, "more than one bridge dev attached"); + return -EINVAL; + } + if (rmnet_is_real_dev_registered(slave_dev)) { NL_SET_ERR_MSG_MOD(extack, "slave cannot be another rmnet dev"); From 146f76cc84b787c4eec6ed73ebeec708a06e4ae4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 4 Jul 2020 13:30:55 +0100 Subject: [PATCH 165/343] KVM: arm64: PMU: Fix per-CPU access in preemptible context Commit 07da1ffaa137 ("KVM: arm64: Remove host_cpu_context member from vcpu structure") has, by removing the host CPU context pointer, exposed that kvm_vcpu_pmu_restore_guest is called in preemptible contexts: [ 266.932442] BUG: using smp_processor_id() in preemptible [00000000] code: qemu-system-aar/779 [ 266.939721] caller is debug_smp_processor_id+0x20/0x30 [ 266.944157] CPU: 2 PID: 779 Comm: qemu-system-aar Tainted: G E 5.8.0-rc3-00015-g8d4aa58b2fe3 #1374 [ 266.954268] Hardware name: amlogic w400/w400, BIOS 2020.04 05/22/2020 [ 266.960640] Call trace: [ 266.963064] dump_backtrace+0x0/0x1e0 [ 266.966679] show_stack+0x20/0x30 [ 266.969959] dump_stack+0xe4/0x154 [ 266.973338] check_preemption_disabled+0xf8/0x108 [ 266.977978] debug_smp_processor_id+0x20/0x30 [ 266.982307] kvm_vcpu_pmu_restore_guest+0x2c/0x68 [ 266.986949] access_pmcr+0xf8/0x128 [ 266.990399] perform_access+0x8c/0x250 [ 266.994108] kvm_handle_sys_reg+0x10c/0x2f8 [ 266.998247] handle_exit+0x78/0x200 [ 267.001697] kvm_arch_vcpu_ioctl_run+0x2ac/0xab8 Note that the bug was always there, it is only the switch to using percpu accessors that made it obvious. The fix is to wrap these accesses in a preempt-disabled section, so that we sample a coherent context on trap from the guest. Fixes: 435e53fb5e21 ("arm64: KVM: Enable VHE support for :G/:H perf event modifiers") Cc:: Andrew Murray Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index b5ae3a5d509e..3c224162b3dd 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -159,7 +159,10 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events) } /* - * On VHE ensure that only guest events have EL0 counting enabled + * On VHE ensure that only guest events have EL0 counting enabled. + * This is called from both vcpu_{load,put} and the sysreg handling. + * Since the latter is preemptible, special care must be taken to + * disable preemption. */ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) { @@ -169,12 +172,14 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) if (!has_vhe()) return; + preempt_disable(); host = this_cpu_ptr(&kvm_host_data); events_guest = host->pmu_events.events_guest; events_host = host->pmu_events.events_host; kvm_vcpu_pmu_enable_el0(events_guest); kvm_vcpu_pmu_disable_el0(events_host); + preempt_enable(); } /* From b9e10d4a6c9f5cbe6369ce2c17ebc67d2e5a4be5 Mon Sep 17 00:00:00 2001 From: Andrew Scull Date: Mon, 6 Jul 2020 10:52:59 +0100 Subject: [PATCH 166/343] KVM: arm64: Stop clobbering x0 for HVC_SOFT_RESTART HVC_SOFT_RESTART is given values for x0-2 that it should installed before exiting to the new address so should not set x0 to stub HVC success or failure code. Fixes: af42f20480bf1 ("arm64: hyp-stub: Zero x0 on successful stub handling") Cc: stable@vger.kernel.org Signed-off-by: Andrew Scull Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200706095259.1338221-1-ascull@google.com --- arch/arm64/kvm/hyp-init.S | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 6e6ed5581eed..e76c0e89d48e 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -136,11 +136,15 @@ SYM_CODE_START(__kvm_handle_stub_hvc) 1: cmp x0, #HVC_RESET_VECTORS b.ne 1f -reset: + /* - * Reset kvm back to the hyp stub. Do not clobber x0-x4 in - * case we coming via HVC_SOFT_RESTART. + * Set the HVC_RESET_VECTORS return code before entering the common + * path so that we do not clobber x0-x2 in case we are coming via + * HVC_SOFT_RESTART. */ + mov x0, xzr +reset: + /* Reset kvm back to the hyp stub. */ mrs x5, sctlr_el2 mov_q x6, SCTLR_ELx_FLAGS bic x5, x5, x6 // Clear SCTL_M and etc @@ -151,7 +155,6 @@ reset: /* Install stub vectors */ adr_l x5, __hyp_stub_vectors msr vbar_el2, x5 - mov x0, xzr eret 1: /* Bad stub call */ From d61cbb859b45fdb6b4997f2d51834fae41af0e94 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Fri, 12 Jun 2020 17:43:22 +0800 Subject: [PATCH 167/343] perf report TUI: Fix segmentation fault in perf_evsel__hists_browse() The segmentation fault can be reproduced as following steps: 1) Executing perf report in tui. 2) Typing '/xxxxx' to filter the symbol to get nothing matched. 3) Pressing enter with no entry selected. Then it will report a segmentation fault. It is caused by the lack of check of browser->he_selection when accessing it's member res_samples in perf_evsel__hists_browse(). These processes are meaningful for specified samples, so we can skip these when nothing is selected. Fixes: 4968ac8fb7c3 ("perf report: Implement browsing of individual samples") Signed-off-by: Wei Li Acked-by: Jiri Olsa Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Hanjun Guo Cc: Jin Yao Cc: Mark Rutland Link: http://lore.kernel.org/lkml/20200612094322.39565-1-liwei391@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f98a118dfc49..4cd556c1276f 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2288,6 +2288,11 @@ static struct thread *hist_browser__selected_thread(struct hist_browser *browser return browser->he_selection->thread; } +static struct res_sample *hist_browser__selected_res_sample(struct hist_browser *browser) +{ + return browser->he_selection ? browser->he_selection->res_samples : NULL; +} + /* Check whether the browser is for 'top' or 'report' */ static inline bool is_report_browser(void *timer) { @@ -3357,16 +3362,16 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, &options[nr_options], NULL, NULL, evsel); nr_options += add_res_sample_opt(browser, &actions[nr_options], &options[nr_options], - hist_browser__selected_entry(browser)->res_samples, - evsel, A_NORMAL); + hist_browser__selected_res_sample(browser), + evsel, A_NORMAL); nr_options += add_res_sample_opt(browser, &actions[nr_options], &options[nr_options], - hist_browser__selected_entry(browser)->res_samples, - evsel, A_ASM); + hist_browser__selected_res_sample(browser), + evsel, A_ASM); nr_options += add_res_sample_opt(browser, &actions[nr_options], &options[nr_options], - hist_browser__selected_entry(browser)->res_samples, - evsel, A_SOURCE); + hist_browser__selected_res_sample(browser), + evsel, A_SOURCE); nr_options += add_switch_opt(browser, &actions[nr_options], &options[nr_options]); skip_scripting: From 75bcb8776dc987538f267ba4ba05ca43fc2b1676 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 30 Jun 2020 16:39:33 +0300 Subject: [PATCH 168/343] perf intel-pt: Fix recording PEBS-via-PT with registers When recording PEBS-via-PT, the kernel will not accept the intel_pt event with register sampling e.g. # perf record --kcore -c 10000 -e '{intel_pt/branch=0/,branch-loads/aux-output/ppp}' -I -- ls -l Error: intel_pt/branch=0/: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' Fix by suppressing register sampling on the intel_pt evsel. Committer notes: Adrian informed that this is only available from Tremont onwards, so on older processors the error continues the same as before. Fixes: 9e64cefe4335b ("perf intel-pt: Process options for PEBS event synthesis") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Luwei Kang Link: http://lore.kernel.org/lkml/20200630133935.11150-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 1 + tools/perf/util/evsel.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 839ef52c1ac2..6ce451293634 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -641,6 +641,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, } evsel->core.attr.freq = 0; evsel->core.attr.sample_period = 1; + evsel->no_aux_samples = true; intel_pt_evsel = evsel; opts->full_auxtrace = true; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a68ac3632ae6..ef802f6d40c1 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1014,12 +1014,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (callchain && callchain->enabled && !evsel->no_aux_samples) evsel__config_callchain(evsel, opts, callchain); - if (opts->sample_intr_regs) { + if (opts->sample_intr_regs && !evsel->no_aux_samples) { attr->sample_regs_intr = opts->sample_intr_regs; evsel__set_sample_bit(evsel, REGS_INTR); } - if (opts->sample_user_regs) { + if (opts->sample_user_regs && !evsel->no_aux_samples) { attr->sample_regs_user |= opts->sample_user_regs; evsel__set_sample_bit(evsel, REGS_USER); } From add07ccd9222ba0944df2a6d39c6e38de90cd0c5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 30 Jun 2020 16:39:34 +0300 Subject: [PATCH 169/343] perf intel-pt: Fix displaying PEBS-via-PT with registers After recording PEBS-via-PT, perf script will not accept 'iregs' field e.g. # perf record -c 10000 -e '{intel_pt/branch=0/,branch-loads/aux-output/ppp}' -I -- ls -l ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.062 MB perf.data ] # ./perf script --itrace=eop -F+iregs Samples for 'dummy:u' event do not have IREGS attribute set. Cannot print 'iregs' field. Fix by using allow_user_set, which is true when recording AUX area data. Fixes: 9e64cefe4335b ("perf intel-pt: Process options for PEBS event synthesis") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Luwei Kang Link: http://lore.kernel.org/lkml/20200630133935.11150-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 181d65e5a450..447457786362 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -462,7 +462,7 @@ static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *sess return -EINVAL; if (PRINT_FIELD(IREGS) && - evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", PERF_OUTPUT_IREGS)) + evsel__do_check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", PERF_OUTPUT_IREGS, allow_user_set)) return -EINVAL; if (PRINT_FIELD(UREGS) && From 4c95ad261cfac120dd66238fcae222766754c219 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 30 Jun 2020 16:39:35 +0300 Subject: [PATCH 170/343] perf intel-pt: Fix PEBS sample for XMM registers The condition to add XMM registers was missing, the regs array needed to be in the outer scope, and the size of the regs array was too small. Fixes: 143d34a6b387b ("perf intel-pt: Add XMM registers to synthesized PEBS sample") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Luwei Kang Link: http://lore.kernel.org/lkml/20200630133935.11150-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index e4dd8bf610ce..cb3c1e569a2d 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1735,6 +1735,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) u64 sample_type = evsel->core.attr.sample_type; u64 id = evsel->core.id[0]; u8 cpumode; + u64 regs[8 * sizeof(sample.intr_regs.mask)]; if (intel_pt_skip_event(pt)) return 0; @@ -1784,8 +1785,8 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) } if (sample_type & PERF_SAMPLE_REGS_INTR && - items->mask[INTEL_PT_GP_REGS_POS]) { - u64 regs[sizeof(sample.intr_regs.mask)]; + (items->mask[INTEL_PT_GP_REGS_POS] || + items->mask[INTEL_PT_XMM_POS])) { u64 regs_mask = evsel->core.attr.sample_regs_intr; u64 *pos; From bee9ca1c8a237ca178f281062bf162637071ab04 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 3 Jul 2020 09:29:57 -0300 Subject: [PATCH 171/343] perf report TUI: Remove needless 'dummy' event from menu Fixing the common case of: perf record perf report And getting just the cycles events. We now have a 'dummy' event to get perf metadata events that take place while we synthesize metadata records for pre-existing processes by traversing procfs, so we always have this extra 'dummy' evsel, but we don't have to offer it as there will be no samples on it, remove this distraction. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/20200706115452.GA2772@redhat.com/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 4cd556c1276f..be9c4c0549bc 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3603,6 +3603,23 @@ static int __perf_evlist__tui_browse_hists(struct evlist *evlist, hbt, warn_lost_event); } +static bool perf_evlist__single_entry(struct evlist *evlist) +{ + int nr_entries = evlist->core.nr_entries; + + if (nr_entries == 1) + return true; + + if (nr_entries == 2) { + struct evsel *last = evlist__last(evlist); + + if (evsel__is_dummy_event(last)) + return true; + } + + return false; +} + int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, @@ -3613,7 +3630,7 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, int nr_entries = evlist->core.nr_entries; single_entry: - if (nr_entries == 1) { + if (perf_evlist__single_entry(evlist)) { struct evsel *first = evlist__first(evlist); return perf_evsel__hists_browse(first, nr_entries, help, From f8884711f78fa946041cf04492e218c377479a9c Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Sun, 21 Jun 2020 08:23:30 +0530 Subject: [PATCH 172/343] mmc: owl-mmc: Get rid of of_match_ptr() macro Remove the 'of_match_ptr()' macro to fix the warning when CONFIG_OF is not selected. drivers/mmc/host/owl-mmc.c:677:34: warning: unused variable 'owl_mmc_of_match' [-Wunused-const-variable] Reported-by: kernel test robot Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200621025330.10561-1-mani@kernel.org Fixes: ff65ffe46d28 ("mmc: Add Actions Semi Owl SoCs SD/MMC driver") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/owl-mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/owl-mmc.c b/drivers/mmc/host/owl-mmc.c index 5e20c099fe03..df43f42855e2 100644 --- a/drivers/mmc/host/owl-mmc.c +++ b/drivers/mmc/host/owl-mmc.c @@ -689,7 +689,7 @@ MODULE_DEVICE_TABLE(of, owl_mmc_of_match); static struct platform_driver owl_mmc_driver = { .driver = { .name = "owl_mmc", - .of_match_table = of_match_ptr(owl_mmc_of_match), + .of_match_table = owl_mmc_of_match, }, .probe = owl_mmc_probe, .remove = owl_mmc_remove, From 65752aef0a407e1ef17ec78a7fc31ba4e0b360f9 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Fri, 3 Jul 2020 02:13:23 -0400 Subject: [PATCH 173/343] docs: block: update and fix tiny error for bfq The max value of blkio.bfq.weight is 1000, rather than 10000. And 'weights' have been remove from /sys/block/XXX/queue/iosched. Signed-off-by: Yufen Yu Acked-by: Paolo Valente Signed-off-by: Jens Axboe --- Documentation/block/bfq-iosched.rst | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/Documentation/block/bfq-iosched.rst b/Documentation/block/bfq-iosched.rst index 0d237d402860..19d4d1570cee 100644 --- a/Documentation/block/bfq-iosched.rst +++ b/Documentation/block/bfq-iosched.rst @@ -492,13 +492,6 @@ set max_budget to higher values than those to which BFQ would have set it with auto-tuning. An alternative way to achieve this goal is to just increase the value of timeout_sync, leaving max_budget equal to 0. -weights -------- - -Read-only parameter, used to show the weights of the currently active -BFQ queues. - - 4. Group scheduling with BFQ ============================ @@ -566,7 +559,7 @@ Parameters to set For each group, there is only the following parameter to set. weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the -group inside its parent. Available values: 1..10000 (default 100). The +group inside its parent. Available values: 1..1000 (default 100). The linear mapping between ioprio and weights, described at the beginning of the tunable section, is still valid, but all weights higher than IOPRIO_BE_NR*10 are mapped to ioprio 0. From 68d237056e007c88031d80900cdba0945121a287 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 30 Jun 2020 10:16:02 +0200 Subject: [PATCH 174/343] scatterlist: protect parameters of the sg_table related macros Add brackets to protect parameters of the recently added sg_table related macros from side-effects. Fixes: 709d6d73c756 ("scatterlist: add generic wrappers for iterating over sgtable objects") Signed-off-by: Marek Szyprowski Signed-off-by: Christoph Hellwig --- include/linux/scatterlist.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 4f922afb607a..45cf7b69d852 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -155,7 +155,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, * Loop over each sg element in the given sg_table object. */ #define for_each_sgtable_sg(sgt, sg, i) \ - for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) + for_each_sg((sgt)->sgl, sg, (sgt)->orig_nents, i) /* * Loop over each sg element in the given *DMA mapped* sg_table object. @@ -163,7 +163,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, * of the each element. */ #define for_each_sgtable_dma_sg(sgt, sg, i) \ - for_each_sg(sgt->sgl, sg, sgt->nents, i) + for_each_sg((sgt)->sgl, sg, (sgt)->nents, i) /** * sg_chain - Chain two sglists together @@ -451,7 +451,7 @@ sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) * See also for_each_sg_page(). In each loop it operates on PAGE_SIZE unit. */ #define for_each_sgtable_page(sgt, piter, pgoffset) \ - for_each_sg_page(sgt->sgl, piter, sgt->orig_nents, pgoffset) + for_each_sg_page((sgt)->sgl, piter, (sgt)->orig_nents, pgoffset) /** * for_each_sgtable_dma_page - iterate over the DMA mapped sg_table object @@ -465,7 +465,7 @@ sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) * unit. */ #define for_each_sgtable_dma_page(sgt, dma_iter, pgoffset) \ - for_each_sg_dma_page(sgt->sgl, dma_iter, sgt->nents, pgoffset) + for_each_sg_dma_page((sgt)->sgl, dma_iter, (sgt)->nents, pgoffset) /* From 9dc829a135fb5927f1519de11286e2bbb79f5b66 Mon Sep 17 00:00:00 2001 From: Xie He Date: Sun, 5 Jul 2020 17:45:21 -0700 Subject: [PATCH 175/343] drivers/net/wan/lapbether: Fixed the value of hard_header_len When this driver transmits data, first this driver will remove a pseudo header of 1 byte, then the lapb module will prepend the LAPB header of 2 or 3 bytes, then this driver will prepend a length field of 2 bytes, then the underlying Ethernet device will prepend its own header. So, the header length required should be: -1 + 3 + 2 + "the header length needed by the underlying device". This patch fixes kernel panic when this driver is used with AF_PACKET SOCK_DGRAM sockets. Signed-off-by: Xie He Signed-off-by: David S. Miller --- drivers/net/wan/lapbether.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index e30d91a38cfb..284832314f31 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -303,7 +303,6 @@ static void lapbeth_setup(struct net_device *dev) dev->netdev_ops = &lapbeth_netdev_ops; dev->needs_free_netdev = true; dev->type = ARPHRD_X25; - dev->hard_header_len = 3; dev->mtu = 1000; dev->addr_len = 0; } @@ -324,6 +323,14 @@ static int lapbeth_new_device(struct net_device *dev) if (!ndev) goto out; + /* When transmitting data: + * first this driver removes a pseudo header of 1 byte, + * then the lapb module prepends an LAPB header of at most 3 bytes, + * then this driver prepends a length field of 2 bytes, + * then the underlying Ethernet device prepends its own header. + */ + ndev->hard_header_len = -1 + 3 + 2 + dev->hard_header_len; + lapbeth = netdev_priv(ndev); lapbeth->axdev = ndev; From 7c8b1e855f94f88a0c569be6309fc8d5c8844cd1 Mon Sep 17 00:00:00 2001 From: Andre Edich Date: Mon, 6 Jul 2020 10:39:34 +0200 Subject: [PATCH 176/343] smsc95xx: check return value of smsc95xx_reset The return value of the function smsc95xx_reset() must be checked to avoid returning false success from the function smsc95xx_bind(). Fixes: 2f7ca802bdae2 ("net: Add SMSC LAN9500 USB2.0 10/100 ethernet adapter driver") Signed-off-by: Andre Edich Signed-off-by: Parthiban Veerasooran Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 3cf4dc3433f9..eb404bb74e18 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1287,6 +1287,8 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) /* Init all registers */ ret = smsc95xx_reset(dev); + if (ret) + goto free_pdata; /* detect device revision as different features may be available */ ret = smsc95xx_read_reg(dev, ID_REV, &val); @@ -1317,6 +1319,10 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) schedule_delayed_work(&pdata->carrier_check, CARRIER_CHECK_DELAY); return 0; + +free_pdata: + kfree(pdata); + return ret; } static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf) From 3ed58f96a70b85ef646d5427258f677f1395b62f Mon Sep 17 00:00:00 2001 From: Andre Edich Date: Mon, 6 Jul 2020 10:39:35 +0200 Subject: [PATCH 177/343] smsc95xx: avoid memory leak in smsc95xx_bind In a case where the ID_REV register read is failed, the memory for a private data structure has to be freed before returning error from the function smsc95xx_bind. Fixes: bbd9f9ee69242 ("smsc95xx: add wol support for more frame types") Signed-off-by: Andre Edich Signed-off-by: Parthiban Veerasooran Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index eb404bb74e18..bb4ccbda031a 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1293,7 +1293,8 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) /* detect device revision as different features may be available */ ret = smsc95xx_read_reg(dev, ID_REV, &val); if (ret < 0) - return ret; + goto free_pdata; + val >>= 16; pdata->chip_id = val; pdata->mdix_ctrl = get_mdix_status(dev->net); From bb3d866882c280a85e8950d4d72af1e294d2e69c Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Mon, 6 Jul 2020 19:25:59 +0800 Subject: [PATCH 178/343] net: hns3: check reset pending after FLR prepare If there is a PF reset pending before FLR prepare, FLR's preparatory work will not fail, but the FLR rebuild procedure will fail for this pending. So this PF reset pending should be handled in the FLR preparatory. Fixes: 8627bdedc435 ("net: hns3: refactor the precedure of PF FLR") Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 96bfad52630d..d6bfdc6520df 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9859,7 +9859,7 @@ static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev) set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); hdev->reset_type = HNAE3_FLR_RESET; ret = hclge_reset_prepare(hdev); - if (ret) { + if (ret || hdev->reset_pending) { dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n", ret); if (hdev->reset_pending || From cddd5648926d7a6e84526dadd8bfb21609a14fb7 Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Mon, 6 Jul 2020 19:26:00 +0800 Subject: [PATCH 179/343] net: hns3: fix for mishandle of asserting VF reset fail When asserts VF reset fail, flag HCLGEVF_STATE_CMD_DISABLE and handshake status should not set, otherwise the retry will fail. So adds a check for asserting VF reset and returns directly when fails. Fixes: ef5f8e507ec9 ("net: hns3: stop handling command queue while resetting VF") Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 1b9578d0bd80..a10b022d1951 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1793,6 +1793,11 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev) if (hdev->reset_type == HNAE3_VF_FUNC_RESET) { hclgevf_build_send_msg(&send_msg, HCLGE_MBX_RESET, 0); ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to assert VF reset, ret = %d\n", ret); + return ret; + } hdev->rst_stats.vf_func_rst_cnt++; } From e22b5e728bbb179b912d3a3cd5c25894a89a26a2 Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Mon, 6 Jul 2020 19:26:01 +0800 Subject: [PATCH 180/343] net: hns3: add a missing uninit debugfs when unload driver When unloading driver, if flag HNS3_NIC_STATE_INITED has been already cleared, the debugfs will not be uninitialized, so fix it. Fixes: b2292360bb2a ("net: hns3: Add debugfs framework registration") Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index b14f2abc2425..c38f3bbe7d97 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -4127,9 +4127,8 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset) hns3_put_ring_config(priv); - hns3_dbg_uninit(handle); - out_netdev_free: + hns3_dbg_uninit(handle); free_netdev(netdev); } From a06656211304fec653c1931c2ca6d644013b5bbb Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Mon, 6 Jul 2020 19:26:02 +0800 Subject: [PATCH 181/343] net: hns3: fix use-after-free when doing self test Enable promisc mode of PF, set VF link state to enable, and run iperf of the VF, then do self test of the PF. The self test will fail with a low frequency, and may cause a use-after-free problem. [ 87.142126] selftest:000004a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [ 87.159722] ================================================================== [ 87.174187] BUG: KASAN: use-after-free in hex_dump_to_buffer+0x140/0x608 [ 87.187600] Read of size 1 at addr ffff003b22828000 by task ethtool/1186 [ 87.201012] [ 87.203978] CPU: 7 PID: 1186 Comm: ethtool Not tainted 5.5.0-rc4-gfd51c473-dirty #4 [ 87.219306] Hardware name: Huawei TaiShan 2280 V2/BC82AMDA, BIOS TA BIOS 2280-A CS V2.B160.01 01/15/2020 [ 87.238292] Call trace: [ 87.243173] dump_backtrace+0x0/0x280 [ 87.250491] show_stack+0x24/0x30 [ 87.257114] dump_stack+0xe8/0x140 [ 87.263911] print_address_description.isra.8+0x70/0x380 [ 87.274538] __kasan_report+0x12c/0x230 [ 87.282203] kasan_report+0xc/0x18 [ 87.288999] __asan_load1+0x60/0x68 [ 87.295969] hex_dump_to_buffer+0x140/0x608 [ 87.304332] print_hex_dump+0x140/0x1e0 [ 87.312000] hns3_lb_check_skb_data+0x168/0x170 [ 87.321060] hns3_clean_rx_ring+0xa94/0xfe0 [ 87.329422] hns3_self_test+0x708/0x8c0 The length of packet sent by the selftest process is only 128 + 14 bytes, and the min buffer size of a BD is 256 bytes, and the receive process will make sure the packet sent by the selftest process is in the linear part, so only check the linear part in hns3_lb_check_skb_data(). So fix this use-after-free by using skb_headlen() to dump skb->data instead of skb->len. Fixes: c39c4d98dc65 ("net: hns3: Add mac loopback selftest support in hns3 driver") Signed-off-by: Yonglong Liu Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 6b1545f982aa..2622e04e8eed 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -180,18 +180,21 @@ static void hns3_lb_check_skb_data(struct hns3_enet_ring *ring, { struct hns3_enet_tqp_vector *tqp_vector = ring->tqp_vector; unsigned char *packet = skb->data; + u32 len = skb_headlen(skb); u32 i; - for (i = 0; i < skb->len; i++) + len = min_t(u32, len, HNS3_NIC_LB_TEST_PACKET_SIZE); + + for (i = 0; i < len; i++) if (packet[i] != (unsigned char)(i & 0xff)) break; /* The packet is correctly received */ - if (i == skb->len) + if (i == HNS3_NIC_LB_TEST_PACKET_SIZE) tqp_vector->rx_group.total_packets++; else print_hex_dump(KERN_ERR, "selftest:", DUMP_PREFIX_OFFSET, 16, 1, - skb->data, skb->len, true); + skb->data, len, true); dev_kfree_skb_any(skb); } From 0b78c9e8c19b57bf2081cd432841c7fe3f800633 Mon Sep 17 00:00:00 2001 From: Pengfei Xu Date: Fri, 26 Jun 2020 11:40:52 +0800 Subject: [PATCH 182/343] selftests: tpm: upgrade TPM2 tests from Python 2 to Python 3 Python 2 is no longer supported by the Python upstream project, so upgrade TPM2 tests to Python 3. Fixed minor merge conflicts Shuah Khan Signed-off-by: Pengfei Xu Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Shuah Khan --- tools/testing/selftests/tpm2/test_smoke.sh | 4 +- tools/testing/selftests/tpm2/test_space.sh | 2 +- tools/testing/selftests/tpm2/tpm2.py | 56 +++++++++++----------- tools/testing/selftests/tpm2/tpm2_tests.py | 39 +++++++-------- 4 files changed, 52 insertions(+), 49 deletions(-) diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh index 1334e301d2a0..3e5ff29ee1dd 100755 --- a/tools/testing/selftests/tpm2/test_smoke.sh +++ b/tools/testing/selftests/tpm2/test_smoke.sh @@ -6,5 +6,5 @@ ksft_skip=4 [ -e /dev/tpm0 ] || exit $ksft_skip -python -m unittest -v tpm2_tests.SmokeTest -python -m unittest -v tpm2_tests.AsyncTest +python3 -m unittest -v tpm2_tests.SmokeTest +python3 -m unittest -v tpm2_tests.AsyncTest diff --git a/tools/testing/selftests/tpm2/test_space.sh b/tools/testing/selftests/tpm2/test_space.sh index 00259cb746cf..04c47b13fe8a 100755 --- a/tools/testing/selftests/tpm2/test_space.sh +++ b/tools/testing/selftests/tpm2/test_space.sh @@ -6,4 +6,4 @@ ksft_skip=4 [ -e /dev/tpmrm0 ] || exit $ksft_skip -python -m unittest -v tpm2_tests.SpaceTest +python3 -m unittest -v tpm2_tests.SpaceTest diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py index d0fcb66a88a6..f34486cd7342 100644 --- a/tools/testing/selftests/tpm2/tpm2.py +++ b/tools/testing/selftests/tpm2/tpm2.py @@ -247,14 +247,14 @@ class ProtocolError(Exception): class AuthCommand(object): """TPMS_AUTH_COMMAND""" - def __init__(self, session_handle=TPM2_RS_PW, nonce='', session_attributes=0, - hmac=''): + def __init__(self, session_handle=TPM2_RS_PW, nonce=bytes(), + session_attributes=0, hmac=bytes()): self.session_handle = session_handle self.nonce = nonce self.session_attributes = session_attributes self.hmac = hmac - def __str__(self): + def __bytes__(self): fmt = '>I H%us B H%us' % (len(self.nonce), len(self.hmac)) return struct.pack(fmt, self.session_handle, len(self.nonce), self.nonce, self.session_attributes, len(self.hmac), @@ -268,11 +268,11 @@ class AuthCommand(object): class SensitiveCreate(object): """TPMS_SENSITIVE_CREATE""" - def __init__(self, user_auth='', data=''): + def __init__(self, user_auth=bytes(), data=bytes()): self.user_auth = user_auth self.data = data - def __str__(self): + def __bytes__(self): fmt = '>H%us H%us' % (len(self.user_auth), len(self.data)) return struct.pack(fmt, len(self.user_auth), self.user_auth, len(self.data), self.data) @@ -296,8 +296,9 @@ class Public(object): return '>HHIH%us%usH%us' % \ (len(self.auth_policy), len(self.parameters), len(self.unique)) - def __init__(self, object_type, name_alg, object_attributes, auth_policy='', - parameters='', unique=''): + def __init__(self, object_type, name_alg, object_attributes, + auth_policy=bytes(), parameters=bytes(), + unique=bytes()): self.object_type = object_type self.name_alg = name_alg self.object_attributes = object_attributes @@ -305,7 +306,7 @@ class Public(object): self.parameters = parameters self.unique = unique - def __str__(self): + def __bytes__(self): return struct.pack(self.__fmt(), self.object_type, self.name_alg, @@ -343,7 +344,7 @@ def get_algorithm(name): def hex_dump(d): d = [format(ord(x), '02x') for x in d] - d = [d[i: i + 16] for i in xrange(0, len(d), 16)] + d = [d[i: i + 16] for i in range(0, len(d), 16)] d = [' '.join(x) for x in d] d = os.linesep.join(d) @@ -401,7 +402,7 @@ class Client: pcrsel_len = max((i >> 3) + 1, 3) pcrsel = [0] * pcrsel_len pcrsel[i >> 3] = 1 << (i & 7) - pcrsel = ''.join(map(chr, pcrsel)) + pcrsel = ''.join(map(chr, pcrsel)).encode() fmt = '>HII IHB%us' % (pcrsel_len) cmd = struct.pack(fmt, @@ -443,7 +444,7 @@ class Client: TPM2_CC_PCR_EXTEND, i, len(auth_cmd), - str(auth_cmd), + bytes(auth_cmd), 1, bank_alg, dig) self.send_cmd(cmd) @@ -457,7 +458,7 @@ class Client: TPM2_RH_NULL, TPM2_RH_NULL, 16, - '\0' * 16, + ('\0' * 16).encode(), 0, session_type, TPM2_ALG_NULL, @@ -472,7 +473,7 @@ class Client: for i in pcrs: pcr = self.read_pcr(i, bank_alg) - if pcr == None: + if pcr is None: return None x += pcr @@ -489,7 +490,7 @@ class Client: pcrsel = [0] * pcrsel_len for i in pcrs: pcrsel[i >> 3] |= 1 << (i & 7) - pcrsel = ''.join(map(chr, pcrsel)) + pcrsel = ''.join(map(chr, pcrsel)).encode() fmt = '>HII IH%usIHB3s' % ds cmd = struct.pack(fmt, @@ -497,7 +498,8 @@ class Client: struct.calcsize(fmt), TPM2_CC_POLICY_PCR, handle, - len(dig), str(dig), + len(dig), + bytes(dig), 1, bank_alg, pcrsel_len, pcrsel) @@ -534,7 +536,7 @@ class Client: self.send_cmd(cmd) - def create_root_key(self, auth_value = ''): + def create_root_key(self, auth_value = bytes()): attributes = \ Public.FIXED_TPM | \ Public.FIXED_PARENT | \ @@ -570,11 +572,11 @@ class Client: TPM2_CC_CREATE_PRIMARY, TPM2_RH_OWNER, len(auth_cmd), - str(auth_cmd), + bytes(auth_cmd), len(sensitive), - str(sensitive), + bytes(sensitive), len(public), - str(public), + bytes(public), 0, 0) return struct.unpack('>I', self.send_cmd(cmd)[10:14])[0] @@ -587,7 +589,7 @@ class Client: attributes = 0 if not policy_dig: attributes |= Public.USER_WITH_AUTH - policy_dig = '' + policy_dig = bytes() auth_cmd = AuthCommand() sensitive = SensitiveCreate(user_auth=auth_value, data=data) @@ -608,11 +610,11 @@ class Client: TPM2_CC_CREATE, parent_key, len(auth_cmd), - str(auth_cmd), + bytes(auth_cmd), len(sensitive), - str(sensitive), + bytes(sensitive), len(public), - str(public), + bytes(public), 0, 0) rsp = self.send_cmd(cmd) @@ -635,7 +637,7 @@ class Client: TPM2_CC_LOAD, parent_key, len(auth_cmd), - str(auth_cmd), + bytes(auth_cmd), blob) data_handle = struct.unpack('>I', self.send_cmd(cmd)[10:14])[0] @@ -653,7 +655,7 @@ class Client: TPM2_CC_UNSEAL, data_handle, len(auth_cmd), - str(auth_cmd)) + bytes(auth_cmd)) try: rsp = self.send_cmd(cmd) @@ -675,7 +677,7 @@ class Client: TPM2_CC_DICTIONARY_ATTACK_LOCK_RESET, TPM2_RH_LOCKOUT, len(auth_cmd), - str(auth_cmd)) + bytes(auth_cmd)) self.send_cmd(cmd) @@ -693,7 +695,7 @@ class Client: more_data, cap, cnt = struct.unpack('>BII', rsp[:9]) rsp = rsp[9:] - for i in xrange(0, cnt): + for i in range(0, cnt): handle = struct.unpack('>I', rsp[:4])[0] handles.append(handle) rsp = rsp[4:] diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py index 728be7c69b76..9d764306887b 100644 --- a/tools/testing/selftests/tpm2/tpm2_tests.py +++ b/tools/testing/selftests/tpm2/tpm2_tests.py @@ -20,8 +20,8 @@ class SmokeTest(unittest.TestCase): self.client.close() def test_seal_with_auth(self): - data = 'X' * 64 - auth = 'A' * 15 + data = ('X' * 64).encode() + auth = ('A' * 15).encode() blob = self.client.seal(self.root_key, data, auth, None) result = self.client.unseal(self.root_key, blob, auth, None) @@ -30,8 +30,8 @@ class SmokeTest(unittest.TestCase): def test_seal_with_policy(self): handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL) - data = 'X' * 64 - auth = 'A' * 15 + data = ('X' * 64).encode() + auth = ('A' * 15).encode() pcrs = [16] try: @@ -58,14 +58,15 @@ class SmokeTest(unittest.TestCase): self.assertEqual(data, result) def test_unseal_with_wrong_auth(self): - data = 'X' * 64 - auth = 'A' * 20 + data = ('X' * 64).encode() + auth = ('A' * 20).encode() rc = 0 blob = self.client.seal(self.root_key, data, auth, None) try: - result = self.client.unseal(self.root_key, blob, auth[:-1] + 'B', None) - except ProtocolError, e: + result = self.client.unseal(self.root_key, blob, + auth[:-1] + 'B'.encode(), None) + except ProtocolError as e: rc = e.rc self.assertEqual(rc, tpm2.TPM2_RC_AUTH_FAIL) @@ -73,8 +74,8 @@ class SmokeTest(unittest.TestCase): def test_unseal_with_wrong_policy(self): handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL) - data = 'X' * 64 - auth = 'A' * 17 + data = ('X' * 64).encode() + auth = ('A' * 17).encode() pcrs = [16] try: @@ -91,7 +92,7 @@ class SmokeTest(unittest.TestCase): # This should succeed. ds = tpm2.get_digest_size(tpm2.TPM2_ALG_SHA1) - self.client.extend_pcr(1, 'X' * ds) + self.client.extend_pcr(1, ('X' * ds).encode()) handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY) @@ -108,7 +109,7 @@ class SmokeTest(unittest.TestCase): # Then, extend a PCR that is part of the policy and try to unseal. # This should fail. - self.client.extend_pcr(16, 'X' * ds) + self.client.extend_pcr(16, ('X' * ds).encode()) handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY) @@ -119,7 +120,7 @@ class SmokeTest(unittest.TestCase): self.client.policy_password(handle) result = self.client.unseal(self.root_key, blob, auth, handle) - except ProtocolError, e: + except ProtocolError as e: rc = e.rc self.client.flush_context(handle) except: @@ -130,13 +131,13 @@ class SmokeTest(unittest.TestCase): def test_seal_with_too_long_auth(self): ds = tpm2.get_digest_size(tpm2.TPM2_ALG_SHA1) - data = 'X' * 64 - auth = 'A' * (ds + 1) + data = ('X' * 64).encode() + auth = ('A' * (ds + 1)).encode() rc = 0 try: blob = self.client.seal(self.root_key, data, auth, None) - except ProtocolError, e: + except ProtocolError as e: rc = e.rc self.assertEqual(rc, tpm2.TPM2_RC_SIZE) @@ -152,7 +153,7 @@ class SmokeTest(unittest.TestCase): 0xDEADBEEF) self.client.send_cmd(cmd) - except IOError, e: + except IOError as e: rejected = True except: pass @@ -212,7 +213,7 @@ class SmokeTest(unittest.TestCase): self.client.tpm.write(cmd) rsp = self.client.tpm.read() - except IOError, e: + except IOError as e: # read the response rsp = self.client.tpm.read() rejected = True @@ -283,7 +284,7 @@ class SpaceTest(unittest.TestCase): rc = 0 try: space1.send_cmd(cmd) - except ProtocolError, e: + except ProtocolError as e: rc = e.rc self.assertEqual(rc, tpm2.TPM2_RC_COMMAND_CODE | From 34fe5a1cf95c3f114068fc16d919c9cf4b00e428 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 6 Jul 2020 11:45:07 -0600 Subject: [PATCH 183/343] ipv6: fib6_select_path can not use out path for nexthop objects Brian reported a crash in IPv6 code when using rpfilter with a setup running FRR and external nexthop objects. The root cause of the crash is fib6_select_path setting fib6_nh in the result to NULL because of an improper check for nexthop objects. More specifically, rpfilter invokes ip6_route_lookup with flowi6_oif set causing fib6_select_path to be called with have_oif_match set. fib6_select_path has early check on have_oif_match and jumps to the out label which presumes a builtin fib6_nh. This path is invalid for nexthop objects; for external nexthops fib6_select_path needs to just return if the fib6_nh has already been set in the result otherwise it returns after the call to nexthop_path_fib6_result. Update the check on have_oif_match to not bail on external nexthops. Update selftests for this problem. Fixes: f88d8ea67fbd ("ipv6: Plumb support for nexthop object in a fib6_info") Reported-by: Brian Rak Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 5 ++++- tools/testing/selftests/net/fib_nexthops.sh | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 82cbb46a2a4f..ea0be7cf3d93 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -431,9 +431,12 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, struct fib6_info *sibling, *next_sibling; struct fib6_info *match = res->f6i; - if ((!match->fib6_nsiblings && !match->nh) || have_oif_match) + if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) goto out; + if (match->nh && have_oif_match && res->nh) + return; + /* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. */ diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index dee567f7576a..22dc2f3d428b 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -747,6 +747,19 @@ ipv6_fcnal_runtime() run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1" run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81" + # rpfilter and default route + $IP nexthop flush >/dev/null 2>&1 + run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP" + run_cmd "$IP nexthop add id 91 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3" + run_cmd "$IP nexthop add id 93 group 91/92" + run_cmd "$IP -6 ro add default nhid 91" + run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" + log_test $? 0 "Nexthop with default route and rpfilter" + run_cmd "$IP -6 ro replace default nhid 93" + run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" + log_test $? 0 "Nexthop with multipath default route and rpfilter" + # TO-DO: # existing route with old nexthop; append route with new nexthop # existing route with old nexthop; replace route with new From 3c01655ac82eb6d1cc2cfe9507031f1b5e0a6df1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 22 Jun 2020 20:07:37 -0400 Subject: [PATCH 184/343] kselftest: ksft_test_num return type should be unsigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a compiler warning: In file included from sync_test.c:37: ../kselftest.h: In function ‘ksft_print_cnts’: ../kselftest.h:78:16: warning: comparison of integer expressions of different signedness: ‘unsigned int’ and ‘int’ [-Wsign-compare] if (ksft_plan != ksft_test_num()) ^~ Signed-off-by: Paolo Bonzini Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 0ac49d91a260..862eee734553 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -36,7 +36,7 @@ struct ksft_count { static struct ksft_count ksft_cnt; static unsigned int ksft_plan; -static inline int ksft_test_num(void) +static inline unsigned int ksft_test_num(void) { return ksft_cnt.ksft_pass + ksft_cnt.ksft_fail + ksft_cnt.ksft_xfail + ksft_cnt.ksft_xpass + From 7dfbf8a07cf8c936b0d6cc810df6ae7923954d5b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 30 Jun 2020 16:27:24 +0100 Subject: [PATCH 185/343] drm/i915: Skip stale object handle for debugfs per-file-stats As we close a handle GEM object, we update the drm_file's idr with an error^W NULL pointer to indicate the in-progress closure, and finally removing it. If we read the idr directly, we may then see an invalid object pointer, and in our debugfs per_file_stats() we therefore need to protect against the entry being invalid. [ 1016.651637] RIP: 0010:per_file_stats+0xe/0x16e [ 1016.651646] Code: d2 41 0f b6 8e 69 8c 00 00 48 89 df 48 c7 c6 7b 74 8c be 31 c0 e8 0c 89 cf ff eb d2 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 53 <8b> 06 85 c0 0f 84 4d 01 00 00 49 89 d6 48 89 f3 3d ff ff ff 7f 73 [ 1016.651651] RSP: 0018:ffffad3a01337ba0 EFLAGS: 00010293 [ 1016.651656] RAX: 0000000000000018 RBX: ffff96fe040d65e0 RCX: 0000000000000002 [ 1016.651660] RDX: ffffad3a01337c50 RSI: 0000000000000000 RDI: 00000000000001e8 [ 1016.651663] RBP: ffffad3a01337bb8 R08: 0000000000000000 R09: 00000000000001c0 [ 1016.651667] R10: 0000000000000000 R11: ffffffffbdbe5fce R12: 0000000000000000 [ 1016.651671] R13: ffffffffbdbe5fce R14: ffffad3a01337c50 R15: 0000000000000001 [ 1016.651676] FS: 00007a597e2d7480(0000) GS:ffff96ff3bb00000(0000) knlGS:0000000000000000 [ 1016.651680] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1016.651683] CR2: 0000000000000000 CR3: 0000000171fc2001 CR4: 00000000003606e0 [ 1016.651687] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1016.651690] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1016.651693] Call Trace: [ 1016.651693] Call Trace: [ 1016.651703] idr_for_each+0x8a/0xe8 [ 1016.651711] i915_gem_object_info+0x2a3/0x3eb [ 1016.651720] seq_read+0x162/0x3ca [ 1016.651727] full_proxy_read+0x5b/0x8d [ 1016.651733] __vfs_read+0x45/0x1bb [ 1016.651741] vfs_read+0xc9/0x15e [ 1016.651746] ksys_read+0x7e/0xde [ 1016.651752] do_syscall_64+0x54/0x68 [ 1016.651758] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported-by: Guenter Roeck Fixes: a8c15954d64a ("drm/i915: Protect debugfs per_file_stats with RCU lock") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Guenter Roeck Cc: stable@vger.kernel.org Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200630152724.3734-1-chris@chris-wilson.co.uk (cherry picked from commit c1b9fd3d310177b31621d5e661f06885869cae12) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bca036ac6621..e7532e7d74e9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -230,7 +230,7 @@ static int per_file_stats(int id, void *ptr, void *data) struct file_stats *stats = data; struct i915_vma *vma; - if (!kref_get_unless_zero(&obj->base.refcount)) + if (IS_ERR_OR_NULL(obj) || !kref_get_unless_zero(&obj->base.refcount)) return 0; stats->count++; From 9eb0463cfe65d826c97fa26b904a64f52c94300d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 13:10:25 +0300 Subject: [PATCH 186/343] drm/i915/fbc: Fix fence_y_offset handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current fence_y_offset calculation is broken. I think it more or less used to do the right thing, but then I changed the plane code to put the final x/y source offsets back into the src rectangle so now it's just subtraacting the same value from itself. The code would never have worked if we allowed the framebuffer to have a non-zero offset. Let's do this in a better way by just calculating the fence_y_offset from the final plane surface offset. Note that we don't align the plane surface address to fence rows so with horizontal panning there's often a horizontal offset from the fence start to the surface address as well. We have no way to tell the hardware about that so we just ignore it. Based on some quick tests the invlidation still happens correctly. I presume due to the invalidation nuking at least the full line (or a segment of multiple lines). Fixes: 54d4d719fa11 ("drm/i915: Overcome display engine stride limits via GTT remapping") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429101034.8208-4-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper (cherry picked from commit 5331889b5ffb11d6257953e418291a9f04c02bed) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 11 +++++++ drivers/gpu/drm/i915/display/intel_display.h | 1 + drivers/gpu/drm/i915/display/intel_fbc.c | 32 ++++++-------------- drivers/gpu/drm/i915/i915_drv.h | 6 ++-- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 9ea1a397d1b5..26996e1839e2 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3822,6 +3822,17 @@ skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, return true; } +unsigned int +intel_plane_fence_y_offset(const struct intel_plane_state *plane_state) +{ + int x = 0, y = 0; + + intel_plane_adjust_aligned_offset(&x, &y, plane_state, 0, + plane_state->color_plane[0].offset, 0); + + return y; +} + static int skl_check_main_surface(struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index efb4da205ea2..3a06f72c9859 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -608,6 +608,7 @@ unsigned int i9xx_plane_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, unsigned int rotation); int bdw_get_pipemisc_bpp(struct intel_crtc *crtc); +unsigned int intel_plane_fence_y_offset(const struct intel_plane_state *plane_state); struct intel_display_error_state * intel_display_capture_error_state(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 1c26673acb2d..a65d9d8b79a7 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -47,19 +47,6 @@ #include "intel_fbc.h" #include "intel_frontbuffer.h" -/* - * In some platforms where the CRTC's x:0/y:0 coordinates doesn't match the - * frontbuffer's x:0/y:0 coordinates we lie to the hardware about the plane's - * origin so the x and y offsets can actually fit the registers. As a - * consequence, the fence doesn't really start exactly at the display plane - * address we program because it starts at the real start of the buffer, so we - * have to take this into consideration here. - */ -static unsigned int get_crtc_fence_y_offset(struct intel_fbc *fbc) -{ - return fbc->state_cache.plane.y - fbc->state_cache.plane.adjusted_y; -} - /* * For SKL+, the plane source size used by the hardware is based on the value we * write to the PLANE_SIZE register. For BDW-, the hardware looks at the value @@ -141,7 +128,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) fbc_ctl2 |= FBC_CTL_CPU_FENCE; intel_de_write(dev_priv, FBC_CONTROL2, fbc_ctl2); intel_de_write(dev_priv, FBC_FENCE_OFF, - params->crtc.fence_y_offset); + params->fence_y_offset); } /* enable it... */ @@ -175,7 +162,7 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv) if (params->fence_id >= 0) { dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fence_id; intel_de_write(dev_priv, DPFC_FENCE_YOFF, - params->crtc.fence_y_offset); + params->fence_y_offset); } else { intel_de_write(dev_priv, DPFC_FENCE_YOFF, 0); } @@ -243,7 +230,7 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) intel_de_write(dev_priv, SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | params->fence_id); intel_de_write(dev_priv, DPFC_CPU_FENCE_OFFSET, - params->crtc.fence_y_offset); + params->fence_y_offset); } } else { if (IS_GEN(dev_priv, 6)) { @@ -253,7 +240,7 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) } intel_de_write(dev_priv, ILK_DPFC_FENCE_YOFF, - params->crtc.fence_y_offset); + params->fence_y_offset); /* enable it... */ intel_de_write(dev_priv, ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); @@ -320,7 +307,7 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) intel_de_write(dev_priv, SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | params->fence_id); intel_de_write(dev_priv, DPFC_CPU_FENCE_OFFSET, - params->crtc.fence_y_offset); + params->fence_y_offset); } else if (dev_priv->ggtt.num_fences) { intel_de_write(dev_priv, SNB_DPFC_CTL_SA, 0); intel_de_write(dev_priv, DPFC_CPU_FENCE_OFFSET, 0); @@ -631,8 +618,8 @@ static bool rotation_is_valid(struct drm_i915_private *dev_priv, /* * For some reason, the hardware tracking starts looking at whatever we * programmed as the display plane base address register. It does not look at - * the X and Y offset registers. That's why we look at the crtc->adjusted{x,y} - * variables instead of just looking at the pipe/plane size. + * the X and Y offset registers. That's why we include the src x/y offsets + * instead of just looking at the plane size. */ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc) { @@ -705,7 +692,6 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->plane.src_h = drm_rect_height(&plane_state->uapi.src) >> 16; cache->plane.adjusted_x = plane_state->color_plane[0].x; cache->plane.adjusted_y = plane_state->color_plane[0].y; - cache->plane.y = plane_state->uapi.src.y1 >> 16; cache->plane.pixel_blend_mode = plane_state->hw.pixel_blend_mode; @@ -713,6 +699,8 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->fb.stride = fb->pitches[0]; cache->fb.modifier = fb->modifier; + cache->fence_y_offset = intel_plane_fence_y_offset(plane_state); + drm_WARN_ON(&dev_priv->drm, plane_state->flags & PLANE_HAS_FENCE && !plane_state->vma->fence); @@ -883,10 +871,10 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, memset(params, 0, sizeof(*params)); params->fence_id = cache->fence_id; + params->fence_y_offset = cache->fence_y_offset; params->crtc.pipe = crtc->pipe; params->crtc.i9xx_plane = to_intel_plane(crtc->base.primary)->i9xx_plane; - params->crtc.fence_y_offset = get_crtc_fence_y_offset(fbc); params->fb.format = cache->fb.format; params->fb.stride = cache->fb.stride; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index adb9bf34cf97..f79f118bf192 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -410,8 +410,6 @@ struct intel_fbc { int adjusted_x; int adjusted_y; - int y; - u16 pixel_blend_mode; } plane; @@ -420,6 +418,8 @@ struct intel_fbc { unsigned int stride; u64 modifier; } fb; + + unsigned int fence_y_offset; u16 gen9_wa_cfb_stride; s8 fence_id; } state_cache; @@ -435,7 +435,6 @@ struct intel_fbc { struct { enum pipe pipe; enum i9xx_plane_id i9xx_plane; - unsigned int fence_y_offset; } crtc; struct { @@ -444,6 +443,7 @@ struct intel_fbc { } fb; int cfb_size; + unsigned int fence_y_offset; u16 gen9_wa_cfb_stride; s8 fence_id; bool plane_visible; From 42723673a193d5f8e30dba6ea9826d42262a502b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Jul 2020 09:32:03 +0100 Subject: [PATCH 187/343] drm/i915: Drop vm.ref for duplicate vma on construction As we allow for parallel threads to create the same vma instance concurrently, and we only filter out the duplicates upon reacquiring the spinlock for the rbtree, we have to free the loser of the constructors' race. When freeing, we should also drop any resource references acquired for the redundant vma. Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.5+ Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200702083225.20044-1-chris@chris-wilson.co.uk (cherry picked from commit 2377427cdd2b7514eb4c40241cf5c4dec63c1bec) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_vma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index fc14ebf9a0b7..2d60fd1f3637 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -198,6 +198,7 @@ vma_create(struct drm_i915_gem_object *obj, cmp = i915_vma_compare(pos, vm, view); if (cmp == 0) { spin_unlock(&obj->vma.lock); + i915_vm_put(vm); i915_vma_free(vma); return pos; } From cf1976b11372cac3b57fbae1831f66a4486355d3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Jul 2020 22:10:15 +0100 Subject: [PATCH 188/343] drm/i915: Also drop vm.ref along error paths for vma construction Not only do we need to release the vm.ref we acquired for the vma on the duplicate insert branch, but also for the normal error paths, so roll them all into one. Reported-by: Andi Shyti Suggested-by: Andi Shyti Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Signed-off-by: Chris Wilson Cc: Andi Shyti Cc: # v5.5+ Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20200702211015.29604-1-chris@chris-wilson.co.uk (cherry picked from commit 03fca66b7a36b52da8915341eee388267f6d5b73) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_vma.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 2d60fd1f3637..1f9cd33b35cb 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -104,6 +104,7 @@ vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *view) { + struct i915_vma *pos = ERR_PTR(-E2BIG); struct i915_vma *vma; struct rb_node *rb, **p; @@ -184,7 +185,6 @@ vma_create(struct drm_i915_gem_object *obj, rb = NULL; p = &obj->vma.tree.rb_node; while (*p) { - struct i915_vma *pos; long cmp; rb = *p; @@ -196,17 +196,12 @@ vma_create(struct drm_i915_gem_object *obj, * and dispose of ours. */ cmp = i915_vma_compare(pos, vm, view); - if (cmp == 0) { - spin_unlock(&obj->vma.lock); - i915_vm_put(vm); - i915_vma_free(vma); - return pos; - } - if (cmp < 0) p = &rb->rb_right; - else + else if (cmp > 0) p = &rb->rb_left; + else + goto err_unlock; } rb_link_node(&vma->obj_node, rb, p); rb_insert_color(&vma->obj_node, &obj->vma.tree); @@ -229,8 +224,9 @@ vma_create(struct drm_i915_gem_object *obj, err_unlock: spin_unlock(&obj->vma.lock); err_vma: + i915_vm_put(vm); i915_vma_free(vma); - return ERR_PTR(-E2BIG); + return pos; } static struct i915_vma * From 786a2aa281f4c4ba424ea8b8ea1e85ab62c4a57c Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 6 Jul 2020 23:53:42 +0300 Subject: [PATCH 189/343] Revert commit e918e570415c ("tpm_tis: Remove the HID IFX0102") Removing IFX0102 from tpm_tis was not a right move because both tpm_tis and tpm_infineon use the same device ID. Revert the commit and add a remark about a bug caused by commit 93e1b7d42e1e ("[PATCH] tpm: add HID module parameter"). Fixes: e918e570415c ("tpm_tis: Remove the HID IFX0102") Reported-by: Peter Huewe Reviewed-by: Jerry Snitselaar Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm_tis.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index c58ea10fc92f..0b214963539d 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -235,9 +235,17 @@ static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev, return tpm_tis_init(&pnp_dev->dev, &tpm_info); } +/* + * There is a known bug caused by 93e1b7d42e1e ("[PATCH] tpm: add HID module + * parameter"). This commit added IFX0102 device ID, which is also used by + * tpm_infineon but ignored to add quirks to probe which driver ought to be + * used. + */ + static struct pnp_device_id tpm_pnp_tbl[] = { {"PNP0C31", 0}, /* TPM */ {"ATM1200", 0}, /* Atmel */ + {"IFX0102", 0}, /* Infineon */ {"BCM0101", 0}, /* Broadcom */ {"BCM0102", 0}, /* Broadcom */ {"NSC1200", 0}, /* National */ From ad155712bb1ea2151944cf06a0e08c315c70c1e3 Mon Sep 17 00:00:00 2001 From: xidongwang Date: Sun, 5 Jul 2020 20:27:38 -0700 Subject: [PATCH 190/343] ALSA: opl3: fix infoleak in opl3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stack object “info” in snd_opl3_ioctl() has a leaking problem. It has 2 padding bytes which are not initialized and leaked via “copy_to_user”. Signed-off-by: xidongwang Cc: Link: https://lore.kernel.org/r/1594006058-30362-1-git-send-email-wangxidong_97@163.com Signed-off-by: Takashi Iwai --- sound/drivers/opl3/opl3_synth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/drivers/opl3/opl3_synth.c b/sound/drivers/opl3/opl3_synth.c index e69a4ef0d6bd..08c10ac9d6c8 100644 --- a/sound/drivers/opl3/opl3_synth.c +++ b/sound/drivers/opl3/opl3_synth.c @@ -91,6 +91,8 @@ int snd_opl3_ioctl(struct snd_hwdep * hw, struct file *file, { struct snd_dm_fm_info info; + memset(&info, 0, sizeof(info)); + info.fm_mode = opl3->fm_mode; info.rhythm = opl3->rhythm; if (copy_to_user(argp, &info, sizeof(struct snd_dm_fm_info))) From 56275036d8185f92eceac7479d48b858ee3dab84 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 3 Jul 2020 18:38:17 +0300 Subject: [PATCH 191/343] ALSA: hda/hdmi: fix failures at PCM open on Intel ICL and later When HDMI PCM devices are opened in a specific order, with at least one HDMI/DP receiver connected, ALSA PCM open fails to -EBUSY on the connected monitor, on recent Intel platforms (ICL/JSL and newer). While this is not a typical sequence, at least Pulseaudio does this every time when it is started, to discover the available PCMs. The rootcause is an invalid assumption in hdmi_add_pin(), where the total number of converters is assumed to be known at the time the function is called. On older Intel platforms this held true, but after ICL/JSL, the order how pins and converters are in the subnode list as returned by snd_hda_get_sub_nodes(), was changed. As a result, information for some converters was not stored to per_pin->mux_nids. And this means some pins cannot be connected to all converters, and application instead gets -EBUSY instead at open. The assumption that converters are always before pins in the subnode list, is not really a valid one. Fix the problem in hdmi_parse_codec() by introducing separate loops for discovering converters and pins. BugLink: https://github.com/thesofproject/linux/issues/1978 BugLink: https://github.com/thesofproject/linux/issues/2216 BugLink: https://github.com/thesofproject/linux/issues/2217 Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20200703153818.2808592-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index e2b21ef5d7d1..295cc5a989d5 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1804,33 +1804,43 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) static int hdmi_parse_codec(struct hda_codec *codec) { - hda_nid_t nid; + hda_nid_t start_nid; + unsigned int caps; int i, nodes; - nodes = snd_hda_get_sub_nodes(codec, codec->core.afg, &nid); - if (!nid || nodes < 0) { + nodes = snd_hda_get_sub_nodes(codec, codec->core.afg, &start_nid); + if (!start_nid || nodes < 0) { codec_warn(codec, "HDMI: failed to get afg sub nodes\n"); return -EINVAL; } - for (i = 0; i < nodes; i++, nid++) { - unsigned int caps; - unsigned int type; + /* + * hdmi_add_pin() assumes total amount of converters to + * be known, so first discover all converters + */ + for (i = 0; i < nodes; i++) { + hda_nid_t nid = start_nid + i; caps = get_wcaps(codec, nid); - type = get_wcaps_type(caps); if (!(caps & AC_WCAP_DIGITAL)) continue; - switch (type) { - case AC_WID_AUD_OUT: + if (get_wcaps_type(caps) == AC_WID_AUD_OUT) hdmi_add_cvt(codec, nid); - break; - case AC_WID_PIN: + } + + /* discover audio pins */ + for (i = 0; i < nodes; i++) { + hda_nid_t nid = start_nid + i; + + caps = get_wcaps(codec, nid); + + if (!(caps & AC_WCAP_DIGITAL)) + continue; + + if (get_wcaps_type(caps) == AC_WID_PIN) hdmi_add_pin(codec, nid); - break; - } } return 0; From 90670fdfcf210f997c18490922bd3603b248f349 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 3 Jul 2020 18:38:18 +0300 Subject: [PATCH 192/343] ALSA: hda/hdmi: improve debug traces for stream lookups The HDMI codec driver has two debug traces printed from different functions but with identical message content: "HDMI: hinfo 000000006a6b84d9 not registered" Fix this duplication and also add a bit more context in addition to raw object pointer, to help analysis of kernel logs. Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20200703153818.2808592-2-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 295cc5a989d5..41eaa89660c3 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -259,7 +259,7 @@ static int hinfo_to_pcm_index(struct hda_codec *codec, if (get_pcm_rec(spec, pcm_idx)->stream == hinfo) return pcm_idx; - codec_warn(codec, "HDMI: hinfo %p not registered\n", hinfo); + codec_warn(codec, "HDMI: hinfo %p not tied to a PCM\n", hinfo); return -EINVAL; } @@ -277,7 +277,8 @@ static int hinfo_to_pin_index(struct hda_codec *codec, return pin_idx; } - codec_dbg(codec, "HDMI: hinfo %p not registered\n", hinfo); + codec_dbg(codec, "HDMI: hinfo %p (pcm %d) not registered\n", hinfo, + hinfo_to_pcm_index(codec, hinfo)); return -EINVAL; } From 9774dc218bb628974dcbc76412f970e9258e5f27 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 3 Jul 2020 17:00:04 +0900 Subject: [PATCH 193/343] ALSA: hda/realtek - Fix Lenovo Thinkpad X1 Carbon 7th quirk subdevice id 1) In snd_hda_pick_fixup(), quirks are first matched by PCI SSID and then, if there is no match, by codec SSID. The Lenovo "ThinkPad X1 Carbon 7th" has an audio chip with PCI SSID 0x2292 and codec SSID 0x2293[1]. Therefore, fix the quirk meant for that device to match on .subdevice == 0x2292. 2) The "Thinkpad X1 Yoga 7th" does not exist. The companion product to the Carbon 7th is the Yoga 4th. That device has an audio chip with PCI SSID 0x2292 and codec SSID 0x2292[2]. Given the behavior of snd_hda_pick_fixup(), it is not possible to have a separate quirk for the Yoga based on SSID. Therefore, merge the quirks meant for the Carbon and Yoga. This preserves the current behavior for the Yoga. [1] This is the case on my own machine and can also be checked here https://github.com/linuxhw/LsPCI/tree/master/Notebook/Lenovo/ThinkPad https://gist.github.com/hamidzr/dd81e429dc86f4327ded7a2030e7d7d9#gistcomment-3225701 [2] https://github.com/linuxhw/LsPCI/tree/master/Convertible/Lenovo/ThinkPad https://gist.github.com/hamidzr/dd81e429dc86f4327ded7a2030e7d7d9#gistcomment-3176355 Fixes: d2cd795c4ece ("ALSA: hda - fixup for the bass speaker on Lenovo Carbon X1 7th gen") Fixes: 54a6a7dc107d ("ALSA: hda/realtek - Add quirk for the bass speaker on Lenovo Yoga X1 7th gen") Cc: Jaroslav Kysela Cc: Kailang Yang Tested-by: Vincent Bernat Tested-by: Even Brenden Signed-off-by: Benjamin Poirier Cc: Link: https://lore.kernel.org/r/20200703080005.8942-2-benjamin.poirier@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 737ef82a75fd..d6dd2aea146d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7571,8 +7571,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x225d, "Thinkpad T480", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), - SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Yoga 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), - SND_PCI_QUIRK(0x17aa, 0x2293, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), + SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), From 8eae7e9b3967f08efaa4d70403aec513cbe45ad0 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Mon, 6 Jul 2020 15:18:25 +0800 Subject: [PATCH 194/343] ALSA: hda/realtek - Enable audio jacks of Acer vCopperbox with ALC269VC The Acer desktop vCopperbox with ALC269VC cannot detect the MIC of headset, the line out and internal speaker until ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS quirk applied. Signed-off-by: Jian-Hong Pan Signed-off-by: Chris Chiu Cc: Link: https://lore.kernel.org/r/20200706071826.39726-1-jian-hong@endlessm.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d6dd2aea146d..3bc93309208c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6149,6 +6149,7 @@ enum { ALC236_FIXUP_HP_MUTE_LED, ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, + ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS, }; static const struct hda_fixup alc269_fixups[] = { @@ -7327,6 +7328,17 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE }, + [ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x14, 0x90100120 }, /* use as internal speaker */ + { 0x18, 0x02a111f0 }, /* use as headset mic, without its own jack detect */ + { 0x1a, 0x01011020 }, /* use as line out */ + { }, + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7346,6 +7358,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1099, "Acer Aspire E5-523G", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x110e, "Acer Aspire ES1-432", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK), + SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), From 6e15d1261d522d1d222f8f89b23c6966905e9049 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Mon, 6 Jul 2020 15:18:27 +0800 Subject: [PATCH 195/343] ALSA: hda/realtek: Enable headset mic of Acer C20-820 with ALC269VC The Acer Aspire C20-820 AIO's audio (1025:1065) with ALC269VC can't detect the headset microphone until ALC269VC_FIXUP_ACER_HEADSET_MIC quirk maps the NID 0x18 as the headset mic pin. Signed-off-by: Jian-Hong Pan Signed-off-by: Daniel Drake Cc: Link: https://lore.kernel.org/r/20200706071826.39726-2-jian-hong@endlessm.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3bc93309208c..bd4739a9e969 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6150,6 +6150,7 @@ enum { ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS, + ALC269VC_FIXUP_ACER_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -7339,6 +7340,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MIC }, + [ALC269VC_FIXUP_ACER_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, 0x02a11030 }, /* use as headset mic */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7354,6 +7364,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1025, 0x1065, "Acer Aspire C20-820", ALC269VC_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x1025, 0x1099, "Acer Aspire E5-523G", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x110e, "Acer Aspire ES1-432", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), From 781c90c034d994c6a4e2badf189128a95ed864c2 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Mon, 6 Jul 2020 15:18:29 +0800 Subject: [PATCH 196/343] ALSA: hda/realtek: Enable headset mic of Acer Veriton N4660G with ALC269VC The Acer Veriton N4660G desktop's audio (1025:1248) with ALC269VC cannot detect the headset microphone until ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE quirk maps the NID 0x18 as the headset mic pin. Signed-off-by: Jian-Hong Pan Cc: Link: https://lore.kernel.org/r/20200706071826.39726-3-jian-hong@endlessm.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bd4739a9e969..194ffa8c66ce 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6151,6 +6151,7 @@ enum { ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS, ALC269VC_FIXUP_ACER_HEADSET_MIC, + ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE, }; static const struct hda_fixup alc269_fixups[] = { @@ -7349,6 +7350,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MIC }, + [ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, 0x01a11130 }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7370,6 +7380,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x110e, "Acer Aspire ES1-432", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK), SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), + SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), From e337bf19f6af38d5c3fa6d06cd594e0f890ca1ac Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 2 Jul 2020 16:14:33 +0900 Subject: [PATCH 197/343] ALSA: usb-audio: add quirk for MacroSilicon MS2109 These devices claim to be 96kHz mono, but actually are 48kHz stereo with swapped channels and unaligned transfers. Cc: stable@vger.kernel.org Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20200702071433.237843-1-marcan@marcan.st Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 52 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 4ec491011b19..9092cc0aa807 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3633,4 +3633,56 @@ ALC1220_VB_DESKTOP(0x26ce, 0x0a01), /* Asrock TRX40 Creator */ } }, +/* + * MacroSilicon MS2109 based HDMI capture cards + * + * These claim 96kHz 1ch in the descriptors, but are actually 48kHz 2ch. + * They also need QUIRK_AUDIO_ALIGN_TRANSFER, which makes one wonder if + * they pretend to be 96kHz mono as a workaround for stereo being broken + * by that... + * + * They also have swapped L-R channels, but that's for userspace to deal + * with. + */ +{ + USB_DEVICE(0x534d, 0x2109), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "MacroSilicon", + .product_name = "MS2109", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 2, + .type = QUIRK_AUDIO_ALIGN_TRANSFER, + }, + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_MIXER, + }, + { + .ifnum = 3, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels = 2, + .iface = 3, + .altsetting = 1, + .altset_idx = 1, + .attributes = 0, + .endpoint = 0x82, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_CONTINUOUS, + .rate_min = 48000, + .rate_max = 48000, + } + }, + { + .ifnum = -1 + } + } + } +}, + #undef USB_DEVICE_VENDOR_SPEC From b6a1e78b96a5d7f312f08b3a470eb911ab5feec0 Mon Sep 17 00:00:00 2001 From: Pavel Hofman Date: Fri, 3 Jul 2020 12:04:33 +0200 Subject: [PATCH 198/343] ALSA: usb-audio: Add implicit feedback quirk for RTX6001 USB Audio analyzer RTX6001 uses the same implicit feedback quirk as other XMOS-based devices. Signed-off-by: Pavel Hofman Tested-by: Pavel Hofman Cc: Link: https://lore.kernel.org/r/822f0f20-1886-6884-a6b2-d11c685cbafa@ivitera.com Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index a777d36c4f5a..40b7cd13fed9 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -368,6 +368,7 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, goto add_sync_ep_from_ifnum; case USB_ID(0x07fd, 0x0008): /* MOTU M Series */ case USB_ID(0x31e9, 0x0002): /* Solid State Logic SSL2+ */ + case USB_ID(0x0d9a, 0x00df): /* RTX6001 */ ep = 0x81; ifnum = 2; goto add_sync_ep_from_ifnum; From f79a732a8325dfbd570d87f1435019d7e5501c6d Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 29 Jun 2020 19:17:37 +0530 Subject: [PATCH 199/343] ALSA: compress: fix partial_drain completion state On partial_drain completion we should be in SNDRV_PCM_STATE_RUNNING state, so set that for partially draining streams in snd_compr_drain_notify() and use a flag for partially draining streams While at it, add locks for stream state change in snd_compr_drain_notify() as well. Fixes: f44f2a5417b2 ("ALSA: compress: fix drain calls blocking other compress functions (v6)") Reviewed-by: Srinivas Kandagatla Tested-by: Srinivas Kandagatla Reviewed-by: Charles Keepax Tested-by: Charles Keepax Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20200629134737.105993-4-vkoul@kernel.org Signed-off-by: Takashi Iwai --- include/sound/compress_driver.h | 10 +++++++++- sound/core/compress_offload.c | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h index 6ce8effa0b12..70cbc5095e72 100644 --- a/include/sound/compress_driver.h +++ b/include/sound/compress_driver.h @@ -66,6 +66,7 @@ struct snd_compr_runtime { * @direction: stream direction, playback/recording * @metadata_set: metadata set flag, true when set * @next_track: has userspace signal next track transition, true when set + * @partial_drain: undergoing partial_drain for stream, true when set * @private_data: pointer to DSP private data * @dma_buffer: allocated buffer if any */ @@ -78,6 +79,7 @@ struct snd_compr_stream { enum snd_compr_direction direction; bool metadata_set; bool next_track; + bool partial_drain; void *private_data; struct snd_dma_buffer dma_buffer; }; @@ -182,7 +184,13 @@ static inline void snd_compr_drain_notify(struct snd_compr_stream *stream) if (snd_BUG_ON(!stream)) return; - stream->runtime->state = SNDRV_PCM_STATE_SETUP; + /* for partial_drain case we are back to running state on success */ + if (stream->partial_drain) { + stream->runtime->state = SNDRV_PCM_STATE_RUNNING; + stream->partial_drain = false; /* clear this flag as well */ + } else { + stream->runtime->state = SNDRV_PCM_STATE_SETUP; + } wake_up(&stream->runtime->sleep); } diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 509290f2efa8..0e53f6f31916 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -764,6 +764,9 @@ static int snd_compr_stop(struct snd_compr_stream *stream) retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_STOP); if (!retval) { + /* clear flags and stop any drain wait */ + stream->partial_drain = false; + stream->metadata_set = false; snd_compr_drain_notify(stream); stream->runtime->total_bytes_available = 0; stream->runtime->total_bytes_transferred = 0; @@ -921,6 +924,7 @@ static int snd_compr_partial_drain(struct snd_compr_stream *stream) if (stream->next_track == false) return -EPERM; + stream->partial_drain = true; retval = stream->ops->trigger(stream, SND_COMPR_TRIGGER_PARTIAL_DRAIN); if (retval) { pr_debug("Partial drain returned failure\n"); From 04e484c5973ed0f9234c97685c3c5e1ebf0d6eb6 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 3 Jul 2020 15:05:50 +0800 Subject: [PATCH 200/343] btrfs: discard: add missing put when grabbing block group from unused list [BUG] The following small test script can trigger ASSERT() at unmount time: mkfs.btrfs -f $dev mount $dev $mnt mount -o remount,discard=async $mnt umount $mnt The call trace: assertion failed: atomic_read(&block_group->count) == 1, in fs/btrfs/block-group.c:3431 ------------[ cut here ]------------ kernel BUG at fs/btrfs/ctree.h:3204! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 10389 Comm: umount Tainted: G O 5.8.0-rc3-custom+ #68 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: btrfs_free_block_groups.cold+0x22/0x55 [btrfs] close_ctree+0x2cb/0x323 [btrfs] btrfs_put_super+0x15/0x17 [btrfs] generic_shutdown_super+0x72/0x110 kill_anon_super+0x18/0x30 btrfs_kill_super+0x17/0x30 [btrfs] deactivate_locked_super+0x3b/0xa0 deactivate_super+0x40/0x50 cleanup_mnt+0x135/0x190 __cleanup_mnt+0x12/0x20 task_work_run+0x64/0xb0 __prepare_exit_to_usermode+0x1bc/0x1c0 __syscall_return_slowpath+0x47/0x230 do_syscall_64+0x64/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The code: ASSERT(atomic_read(&block_group->count) == 1); btrfs_put_block_group(block_group); [CAUSE] Obviously it's some btrfs_get_block_group() call doesn't get its put call. The offending btrfs_get_block_group() happens here: void btrfs_mark_bg_unused(struct btrfs_block_group *bg) { if (list_empty(&bg->bg_list)) { btrfs_get_block_group(bg); list_add_tail(&bg->bg_list, &fs_info->unused_bgs); } } So every call sites removing the block group from unused_bgs list should reduce the ref count of that block group. However for async discard, it didn't follow the call convention: void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info) { list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs, bg_list) { list_del_init(&block_group->bg_list); btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); } } And in btrfs_discard_queue_work(), it doesn't call btrfs_put_block_group() either. [FIX] Fix the problem by reducing the reference count when we grab the block group from unused_bgs list. Reported-by: Marcos Paulo de Souza Fixes: 6e80d4f8c422 ("btrfs: handle empty block_group removal for async discard") CC: stable@vger.kernel.org # 5.6+ Tested-by: Marcos Paulo de Souza Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/discard.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index 5615320fa659..741c7e19c32f 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -619,6 +619,7 @@ void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info) list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs, bg_list) { list_del_init(&block_group->bg_list); + btrfs_put_block_group(block_group); btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); } spin_unlock(&fs_info->unused_bgs_lock); From 05a4fed69ff00a8bd83538684cb602a4636b07a7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 7 Jul 2020 11:04:33 -0400 Subject: [PATCH 201/343] blk-mq: consider non-idle request as "inflight" in blk_mq_rq_inflight() dm-multipath is the only user of blk_mq_queue_inflight(). When dm-multipath calls blk_mq_queue_inflight() to check if it has outstanding IO it can get a false negative. The reason for this is blk_mq_rq_inflight() doesn't consider requests that are no longer MQ_RQ_IN_FLIGHT but that are now MQ_RQ_COMPLETE (->complete isn't called or finished yet) as "inflight". This causes request-based dm-multipath's dm_wait_for_completion() to return before all outstanding dm-multipath requests have actually completed. This breaks DM multipath's suspend functionality because blk-mq requests complete after DM's suspend has finished -- which shouldn't happen. Fix this by considering any request not in the MQ_RQ_IDLE state (so either MQ_RQ_COMPLETE or MQ_RQ_IN_FLIGHT) as "inflight" in blk_mq_rq_inflight(). Fixes: 3c94d83cb3526 ("blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight()") Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index a9aa6d1e44cf..4e0d173beaa3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -828,10 +828,10 @@ static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) { /* - * If we find a request that is inflight and the queue matches, + * If we find a request that isn't idle and the queue matches, * we know the queue is busy. Return false to stop the iteration. */ - if (rq->state == MQ_RQ_IN_FLIGHT && rq->q == hctx->queue) { + if (blk_mq_request_started(rq) && rq->q == hctx->queue) { bool *busy = priv; *busy = true; From 85067747cf9888249fa11fa49ef75af5192d3988 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 24 Jun 2020 16:00:58 -0400 Subject: [PATCH 202/343] dm: do not use waitqueue for request-based DM Given request-based DM now uses blk-mq's blk_mq_queue_inflight() to determine if outstanding IO has completed (and DM has no control over the blk-mq state machine used to track outstanding IO) it is unsafe to wakeup waiter (dm_wait_for_completion) before blk-mq has cleared a request's state bits (e.g. MQ_RQ_IN_FLIGHT or MQ_RQ_COMPLETE). As such dm_wait_for_completion() could be left to wait indefinitely if no other requests complete. Fix this by eliminating request-based DM's use of waitqueue to wait for blk-mq requests to complete in dm_wait_for_completion. Signed-off-by: Ming Lei Depends-on: 3c94d83cb3526 ("blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight()") Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-rq.c | 4 --- drivers/md/dm.c | 64 ++++++++++++++++++++++++++++------------------ 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index f60c02512121..85e0daabad49 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -146,10 +146,6 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) */ static void rq_completed(struct mapped_device *md) { - /* nudge anyone waiting on suspend queue */ - if (unlikely(wq_has_sleeper(&md->wait))) - wake_up(&md->wait); - /* * dm_put() must be at the end of this function. See the comment above */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e6807792fec8..446aff589732 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -654,28 +654,6 @@ static void free_tio(struct dm_target_io *tio) bio_put(&tio->clone); } -static bool md_in_flight_bios(struct mapped_device *md) -{ - int cpu; - struct hd_struct *part = &dm_disk(md)->part0; - long sum = 0; - - for_each_possible_cpu(cpu) { - sum += part_stat_local_read_cpu(part, in_flight[0], cpu); - sum += part_stat_local_read_cpu(part, in_flight[1], cpu); - } - - return sum != 0; -} - -static bool md_in_flight(struct mapped_device *md) -{ - if (queue_is_mq(md->queue)) - return blk_mq_queue_inflight(md->queue); - else - return md_in_flight_bios(md); -} - u64 dm_start_time_ns_from_clone(struct bio *bio) { struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); @@ -2470,15 +2448,29 @@ void dm_put(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_put); -static int dm_wait_for_completion(struct mapped_device *md, long task_state) +static bool md_in_flight_bios(struct mapped_device *md) +{ + int cpu; + struct hd_struct *part = &dm_disk(md)->part0; + long sum = 0; + + for_each_possible_cpu(cpu) { + sum += part_stat_local_read_cpu(part, in_flight[0], cpu); + sum += part_stat_local_read_cpu(part, in_flight[1], cpu); + } + + return sum != 0; +} + +static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state) { int r = 0; DEFINE_WAIT(wait); - while (1) { + while (true) { prepare_to_wait(&md->wait, &wait, task_state); - if (!md_in_flight(md)) + if (!md_in_flight_bios(md)) break; if (signal_pending_state(task_state, current)) { @@ -2493,6 +2485,28 @@ static int dm_wait_for_completion(struct mapped_device *md, long task_state) return r; } +static int dm_wait_for_completion(struct mapped_device *md, long task_state) +{ + int r = 0; + + if (!queue_is_mq(md->queue)) + return dm_wait_for_bios_completion(md, task_state); + + while (true) { + if (!blk_mq_queue_inflight(md->queue)) + break; + + if (signal_pending_state(task_state, current)) { + r = -EINTR; + break; + } + + msleep(5); + } + + return r; +} + /* * Process the deferred bios */ From 880bc529bae2257d05a767f7f5c24fd61a47b605 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 26 Jun 2020 08:55:11 +0200 Subject: [PATCH 203/343] mtd: rawnand: xway: Fix build issue This MIPS driver does not support COMPILE_TEST yet and failed to build under my radar. Replace 'mtd' chich is not defined in the scope of xway_nand_remove() by nand_to_mtd(chip). The mistake has been added in the long series dropping nand_release(). Tested with a 7.3.0 MIPS GCC toolchain built with Buildroot. Fixes: 9fdd78f7bcda ("mtd: rawnand: xway: Stop using nand_release()") Reported-by: kernel test robot Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200626065511.16424-1-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/xway_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c index 94bfba994326..29255476afdb 100644 --- a/drivers/mtd/nand/raw/xway_nand.c +++ b/drivers/mtd/nand/raw/xway_nand.c @@ -224,7 +224,7 @@ static int xway_nand_remove(struct platform_device *pdev) struct nand_chip *chip = &data->chip; int ret; - ret = mtd_device_unregister(mtd); + ret = mtd_device_unregister(nand_to_mtd(chip)); WARN_ON(ret); nand_cleanup(chip); From 74478ea4ded519db35cb1f059948b1e713bb4abf Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 6 Jul 2020 18:10:08 -0500 Subject: [PATCH 204/343] net: ipa: fix QMI structure definition bugs Building with "W=1" did exactly what it was supposed to do, namely point out some suspicious-looking code to be verified not to contain bugs. Some QMI message structures defined in "ipa_qmi_msg.c" contained some bad field names (duplicating the "elem_size" field instead of defining the "offset" field), almost certainly due to copy/paste errors that weren't obvious in a scan of the code. Fix these bugs. Fixes: 530f9216a953 ("soc: qcom: ipa: AP/modem communications") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_qmi_msg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ipa/ipa_qmi_msg.c b/drivers/net/ipa/ipa_qmi_msg.c index 03a1d0e55964..73413371e3d3 100644 --- a/drivers/net/ipa/ipa_qmi_msg.c +++ b/drivers/net/ipa/ipa_qmi_msg.c @@ -119,7 +119,7 @@ struct qmi_elem_info ipa_driver_init_complete_rsp_ei[] = { sizeof_field(struct ipa_driver_init_complete_rsp, rsp), .tlv_type = 0x02, - .elem_size = offsetof(struct ipa_driver_init_complete_rsp, + .offset = offsetof(struct ipa_driver_init_complete_rsp, rsp), .ei_array = qmi_response_type_v01_ei, }, @@ -137,7 +137,7 @@ struct qmi_elem_info ipa_init_complete_ind_ei[] = { sizeof_field(struct ipa_init_complete_ind, status), .tlv_type = 0x02, - .elem_size = offsetof(struct ipa_init_complete_ind, + .offset = offsetof(struct ipa_init_complete_ind, status), .ei_array = qmi_response_type_v01_ei, }, @@ -218,7 +218,7 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = { sizeof_field(struct ipa_init_modem_driver_req, platform_type_valid), .tlv_type = 0x10, - .elem_size = offsetof(struct ipa_init_modem_driver_req, + .offset = offsetof(struct ipa_init_modem_driver_req, platform_type_valid), }, { From 3c90e95bd958285d416e83cffda82ddc92d91294 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 6 Jul 2020 18:10:09 -0500 Subject: [PATCH 205/343] net: ipa: declare struct types in "ipa_gsi.h" Pointers to two struct types are used in "ipa_gsi.h", without those struct types being forward-declared. Add these declarations. Fixes: c3f398b141a8 ("soc: qcom: ipa: IPA interface to GSI") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_gsi.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ipa/ipa_gsi.h b/drivers/net/ipa/ipa_gsi.h index 3cf18600c68e..0a40f3dc55fc 100644 --- a/drivers/net/ipa/ipa_gsi.h +++ b/drivers/net/ipa/ipa_gsi.h @@ -8,7 +8,9 @@ #include +struct gsi; struct gsi_trans; +struct ipa_gsi_endpoint_data; /** * ipa_gsi_trans_complete() - GSI transaction completion callback From a21c1f028fbae8c8e2e2602d3ea206fefa448d73 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 6 Jul 2020 18:10:10 -0500 Subject: [PATCH 206/343] net: ipa: include declarations in "ipa_gsi.c" Include "ipa_gsi.h" in "ipa_gsi.c", so the public functions are defined before they are used in "ipa_gsi.c". This addresses some warnings that are reported with a "W=1" build. Fixes: c3f398b141a8 ("soc: qcom: ipa: IPA interface to GSI") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_gsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipa/ipa_gsi.c b/drivers/net/ipa/ipa_gsi.c index dc4a5c2196ae..d323adb03383 100644 --- a/drivers/net/ipa/ipa_gsi.c +++ b/drivers/net/ipa/ipa_gsi.c @@ -6,6 +6,7 @@ #include +#include "ipa_gsi.h" #include "gsi_trans.h" #include "ipa.h" #include "ipa_endpoint.h" From f815dd5cf48b905eeecf0a2b990e9b7ab048b4f1 Mon Sep 17 00:00:00 2001 From: AceLan Kao Date: Tue, 7 Jul 2020 16:14:45 +0800 Subject: [PATCH 207/343] net: usb: qmi_wwan: add support for Quectel EG95 LTE modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for Quectel Wireless Solutions Co., Ltd. EG95 LTE modem T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=02 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0195 Rev=03.18 S: Manufacturer=Android S: Product=Android C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#=0x1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) Signed-off-by: AceLan Kao Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 31b1d4b959f6..07c42c0719f5 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1370,6 +1370,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0195, 4)}, /* Quectel EG95 */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */ {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ From aea23c323d89836bcdcee67e49def997ffca043b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 7 Jul 2020 07:39:24 -0600 Subject: [PATCH 208/343] ipv6: Fix use of anycast address with loopback Thomas reported a regression with IPv6 and anycast using the following reproducer: echo 1 > /proc/sys/net/ipv6/conf/all/forwarding ip -6 a add fc12::1/16 dev lo sleep 2 echo "pinging lo" ping6 -c 2 fc12:: The conversion of addrconf_f6i_alloc to use ip6_route_info_create missed the use of fib6_is_reject which checks addresses added to the loopback interface and sets the REJECT flag as needed. Update fib6_is_reject for loopback checks to handle RTF_ANYCAST addresses. Fixes: c7a1ce397ada ("ipv6: Change addrconf_f6i_alloc to use ip6_route_info_create") Reported-by: thomas.gambier@nexedi.com Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ea0be7cf3d93..f3279810d765 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3405,7 +3405,7 @@ static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type) if ((flags & RTF_REJECT) || (dev && (dev->flags & IFF_LOOPBACK) && !(addr_type & IPV6_ADDR_LOOPBACK) && - !(flags & RTF_LOCAL))) + !(flags & (RTF_ANYCAST | RTF_LOCAL)))) return true; return false; From ad0f75e5f57ccbceec13274e1e242f2b5a6397ed Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 2 Jul 2020 11:52:56 -0700 Subject: [PATCH 209/343] cgroup: fix cgroup_sk_alloc() for sk_clone_lock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we clone a socket in sk_clone_lock(), its sk_cgrp_data is copied, so the cgroup refcnt must be taken too. And, unlike the sk_alloc() path, sock_update_netprioidx() is not called here. Therefore, it is safe and necessary to grab the cgroup refcnt even when cgroup_sk_alloc is disabled. sk_clone_lock() is in BH context anyway, the in_interrupt() would terminate this function if called there. And for sk_alloc() skcd->val is always zero. So it's safe to factor out the code to make it more readable. The global variable 'cgroup_sk_alloc_disabled' is used to determine whether to take these reference counts. It is impossible to make the reference counting correct unless we save this bit of information in skcd->val. So, add a new bit there to record whether the socket has already taken the reference counts. This obviously relies on kmalloc() to align cgroup pointers to at least 4 bytes, ARCH_KMALLOC_MINALIGN is certainly larger than that. This bug seems to be introduced since the beginning, commit d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") tried to fix it but not compeletely. It seems not easy to trigger until the recent commit 090e28b229af ("netprio_cgroup: Fix unlimited memory leak of v2 cgroups") was merged. Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") Reported-by: Cameron Berkenpas Reported-by: Peter Geis Reported-by: Lu Fengqi Reported-by: Daniël Sonck Reported-by: Zhang Qiang Tested-by: Cameron Berkenpas Tested-by: Peter Geis Tested-by: Thomas Lamprecht Cc: Daniel Borkmann Cc: Zefan Li Cc: Tejun Heo Cc: Roman Gushchin Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 6 ++++-- include/linux/cgroup.h | 4 +++- kernel/cgroup/cgroup.c | 31 +++++++++++++++++++------------ net/core/sock.c | 2 +- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 52661155f85f..4f1cd0edc57d 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -790,7 +790,8 @@ struct sock_cgroup_data { union { #ifdef __LITTLE_ENDIAN struct { - u8 is_data; + u8 is_data : 1; + u8 no_refcnt : 1; u8 padding; u16 prioidx; u32 classid; @@ -800,7 +801,8 @@ struct sock_cgroup_data { u32 classid; u16 prioidx; u8 padding; - u8 is_data; + u8 no_refcnt : 1; + u8 is_data : 1; } __packed; #endif u64 val; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4598e4da6b1b..618838c48313 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -822,6 +822,7 @@ extern spinlock_t cgroup_sk_update_lock; void cgroup_sk_alloc_disable(void); void cgroup_sk_alloc(struct sock_cgroup_data *skcd); +void cgroup_sk_clone(struct sock_cgroup_data *skcd); void cgroup_sk_free(struct sock_cgroup_data *skcd); static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) @@ -835,7 +836,7 @@ static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) */ v = READ_ONCE(skcd->val); - if (v & 1) + if (v & 3) return &cgrp_dfl_root.cgrp; return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; @@ -847,6 +848,7 @@ static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) #else /* CONFIG_CGROUP_DATA */ static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {} +static inline void cgroup_sk_clone(struct sock_cgroup_data *skcd) {} static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} #endif /* CONFIG_CGROUP_DATA */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1ea181a58465..dd247747ec14 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6439,18 +6439,8 @@ void cgroup_sk_alloc_disable(void) void cgroup_sk_alloc(struct sock_cgroup_data *skcd) { - if (cgroup_sk_alloc_disabled) - return; - - /* Socket clone path */ - if (skcd->val) { - /* - * We might be cloning a socket which is left in an empty - * cgroup and the cgroup might have already been rmdir'd. - * Don't use cgroup_get_live(). - */ - cgroup_get(sock_cgroup_ptr(skcd)); - cgroup_bpf_get(sock_cgroup_ptr(skcd)); + if (cgroup_sk_alloc_disabled) { + skcd->no_refcnt = 1; return; } @@ -6475,10 +6465,27 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) rcu_read_unlock(); } +void cgroup_sk_clone(struct sock_cgroup_data *skcd) +{ + if (skcd->val) { + if (skcd->no_refcnt) + return; + /* + * We might be cloning a socket which is left in an empty + * cgroup and the cgroup might have already been rmdir'd. + * Don't use cgroup_get_live(). + */ + cgroup_get(sock_cgroup_ptr(skcd)); + cgroup_bpf_get(sock_cgroup_ptr(skcd)); + } +} + void cgroup_sk_free(struct sock_cgroup_data *skcd) { struct cgroup *cgrp = sock_cgroup_ptr(skcd); + if (skcd->no_refcnt) + return; cgroup_bpf_put(cgrp); cgroup_put(cgrp); } diff --git a/net/core/sock.c b/net/core/sock.c index d832c650287c..2e5b7870e5d3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1926,7 +1926,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* sk->sk_memcg will be populated at accept() time */ newsk->sk_memcg = NULL; - cgroup_sk_alloc(&newsk->sk_cgrp_data); + cgroup_sk_clone(&newsk->sk_cgrp_data); rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); From 41da51bce36f44eefc1e3d0f47d18841cbd065ba Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 21 Nov 2019 23:25:07 +0000 Subject: [PATCH 210/343] fs: Add IOCB_NOIO flag for generic_file_read_iter Add an IOCB_NOIO flag that indicates to generic_file_read_iter that it shouldn't trigger any filesystem I/O for the actual request or for readahead. This allows to do tentative reads out of the page cache as some filesystems allow, and to take the appropriate locks and retry the reads only if the requested pages are not cached. Signed-off-by: Andreas Gruenbacher --- include/linux/fs.h | 1 + mm/filemap.c | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f881a892ea7..4b7cb76e5837 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -315,6 +315,7 @@ enum rw_hint { #define IOCB_SYNC (1 << 5) #define IOCB_WRITE (1 << 6) #define IOCB_NOWAIT (1 << 7) +#define IOCB_NOIO (1 << 9) struct kiocb { struct file *ki_filp; diff --git a/mm/filemap.c b/mm/filemap.c index f0ae9a6308cb..385759c4ce4b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2028,7 +2028,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, page = find_get_page(mapping, index); if (!page) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO)) goto would_block; page_cache_sync_readahead(mapping, ra, filp, @@ -2038,6 +2038,10 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, goto no_cached_page; } if (PageReadahead(page)) { + if (iocb->ki_flags & IOCB_NOIO) { + put_page(page); + goto out; + } page_cache_async_readahead(mapping, ra, filp, page, index, last_index - index); @@ -2160,6 +2164,11 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, } readpage: + if (iocb->ki_flags & IOCB_NOIO) { + unlock_page(page); + put_page(page); + goto would_block; + } /* * A previous I/O error may have been due to temporary * failures, eg. multipath errors. @@ -2249,9 +2258,19 @@ EXPORT_SYMBOL_GPL(generic_file_buffered_read); * * This is the "read_iter()" routine for all filesystems * that can use the page cache directly. + * + * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall + * be returned when no data can be read without waiting for I/O requests + * to complete; it doesn't prevent readahead. + * + * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O + * requests shall be made for the read or for readahead. When no data + * can be read, -EAGAIN shall be returned. When readahead would be + * triggered, a partial, possibly empty read shall be returned. + * * Return: * * number of bytes copied, even for partial reads - * * negative error code if nothing was read + * * negative error code (or 0 if IOCB_NOIO) if nothing was read */ ssize_t generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) From 20f829999c38b18e3d17f9e40dea3a28f721fac4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 1 Jul 2020 19:25:19 +0200 Subject: [PATCH 211/343] gfs2: Rework read and page fault locking So far, gfs2 has taken the inode glocks inside the ->readpage and ->readahead address space operations. Since commit d4388340ae0b ("fs: convert mpage_readpages to mpage_readahead"), gfs2_readahead is passed the pages to read ahead locked. With that, the current holder of the inode glock may be trying to lock one of those pages while gfs2_readahead is trying to take the inode glock, resulting in a deadlock. Fix that by moving the lock taking to the higher-level ->read_iter file and ->fault vm operations. This also gets rid of an ugly lock inversion workaround in gfs2_readpage. The cache consistency model of filesystems like gfs2 is such that if data is found in the page cache, the data is up to date and can be used without taking any filesystem locks. If a page is not cached, filesystem locks must be taken before populating the page cache. To avoid taking the inode glock when the data is already cached, gfs2_file_read_iter first tries to read the data with the IOCB_NOIO flag set. If that fails, the inode glock is taken and the operation is retried with the IOCB_NOIO flag cleared. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/aops.c | 45 +------------------------------------------ fs/gfs2/file.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 46 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 72c9560f4467..68cd700a2719 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -468,21 +468,10 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) } -/** - * __gfs2_readpage - readpage - * @file: The file to read a page for - * @page: The page to read - * - * This is the core of gfs2's readpage. It's used by the internal file - * reading code as in that case we already hold the glock. Also it's - * called by gfs2_readpage() once the required lock has been granted. - */ - static int __gfs2_readpage(void *file, struct page *page) { struct gfs2_inode *ip = GFS2_I(page->mapping->host); struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); - int error; if (i_blocksize(page->mapping->host) == PAGE_SIZE && @@ -505,36 +494,11 @@ static int __gfs2_readpage(void *file, struct page *page) * gfs2_readpage - read a page of a file * @file: The file to read * @page: The page of the file - * - * This deals with the locking required. We have to unlock and - * relock the page in order to get the locking in the right - * order. */ static int gfs2_readpage(struct file *file, struct page *page) { - struct address_space *mapping = page->mapping; - struct gfs2_inode *ip = GFS2_I(mapping->host); - struct gfs2_holder gh; - int error; - - unlock_page(page); - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); - error = gfs2_glock_nq(&gh); - if (unlikely(error)) - goto out; - error = AOP_TRUNCATED_PAGE; - lock_page(page); - if (page->mapping == mapping && !PageUptodate(page)) - error = __gfs2_readpage(file, page); - else - unlock_page(page); - gfs2_glock_dq(&gh); -out: - gfs2_holder_uninit(&gh); - if (error && error != AOP_TRUNCATED_PAGE) - lock_page(page); - return error; + return __gfs2_readpage(file, page); } /** @@ -598,16 +562,9 @@ static void gfs2_readahead(struct readahead_control *rac) { struct inode *inode = rac->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder gh; - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); - if (gfs2_glock_nq(&gh)) - goto out_uninit; if (!gfs2_is_stuffed(ip)) mpage_readahead(rac, gfs2_block_map); - gfs2_glock_dq(&gh); -out_uninit: - gfs2_holder_uninit(&gh); } /** diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index fe305e4bfd37..bebde537ac8c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -558,8 +558,29 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) return block_page_mkwrite_return(ret); } +static vm_fault_t gfs2_fault(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + vm_fault_t ret; + int err; + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); + err = gfs2_glock_nq(&gh); + if (err) { + ret = block_page_mkwrite_return(err); + goto out_uninit; + } + ret = filemap_fault(vmf); + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); + return ret; +} + static const struct vm_operations_struct gfs2_vm_ops = { - .fault = filemap_fault, + .fault = gfs2_fault, .map_pages = filemap_map_pages, .page_mkwrite = gfs2_page_mkwrite, }; @@ -824,6 +845,9 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from) static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct gfs2_inode *ip; + struct gfs2_holder gh; + size_t written = 0; ssize_t ret; if (iocb->ki_flags & IOCB_DIRECT) { @@ -832,7 +856,31 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; iocb->ki_flags &= ~IOCB_DIRECT; } - return generic_file_read_iter(iocb, to); + iocb->ki_flags |= IOCB_NOIO; + ret = generic_file_read_iter(iocb, to); + iocb->ki_flags &= ~IOCB_NOIO; + if (ret >= 0) { + if (!iov_iter_count(to)) + return ret; + written = ret; + } else { + if (ret != -EAGAIN) + return ret; + if (iocb->ki_flags & IOCB_NOWAIT) + return ret; + } + ip = GFS2_I(iocb->ki_filp->f_mapping->host); + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret) + goto out_uninit; + ret = generic_file_read_iter(iocb, to); + if (ret > 0) + written += ret; + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); + return written ? written : ret; } /** From 28b18e4eb515af7c6661c3995c6e3c34412c2874 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 3 Jul 2020 06:33:59 -0700 Subject: [PATCH 212/343] net: sky2: initialize return of gm_phy_read clang static analysis flags this garbage return drivers/net/ethernet/marvell/sky2.c:208:2: warning: Undefined or garbage value returned to caller [core.uninitialized.UndefReturn] return v; ^~~~~~~~ static inline u16 gm_phy_read( ... { u16 v; __gm_phy_read(hw, port, reg, &v); return v; } __gm_phy_read can return without setting v. So handle similar to skge.c's gm_phy_read, initialize v. Signed-off-by: Tom Rix Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 241f00716979..fe54764caea9 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -203,7 +203,7 @@ static int __gm_phy_read(struct sky2_hw *hw, unsigned port, u16 reg, u16 *val) static inline u16 gm_phy_read(struct sky2_hw *hw, unsigned port, u16 reg) { - u16 v; + u16 v = 0; __gm_phy_read(hw, port, reg, &v); return v; } From 7cdaa4cc4bdfa252d515b307863f5a1972246dd6 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Fri, 3 Jul 2020 16:10:58 +0200 Subject: [PATCH 213/343] net: ethernet: fec: prevent tx starvation under high rx load In the ISR, we poll the event register for the queues in need of service and then enter polled mode. After this point, the event register will never be read again until we exit polled mode. In a scenario where a UDP flow is routed back out through the same interface, i.e. "router-on-a-stick" we'll typically only see an rx queue event initially. Once we start to process the incoming flow we'll be locked polled mode, but we'll never clean the tx rings since that event is never caught. Eventually the netdev watchdog will trip, causing all buffers to be dropped and then the process starts over again. Rework the NAPI poll to keep trying to consome the entire budget as long as new events are coming in, making sure to service all rx/tx queues, in priority order, on each pass. Fixes: 4d494cdc92b3 ("net: fec: change data structure to support multiqueue") Signed-off-by: Tobias Waldekranz Tested-by: Fugang Duan Reviewed-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec.h | 5 -- drivers/net/ethernet/freescale/fec_main.c | 100 ++++++++-------------- 2 files changed, 34 insertions(+), 71 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index a6cdd5b61921..d8d76da51c5e 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -525,11 +525,6 @@ struct fec_enet_private { unsigned int total_tx_ring_size; unsigned int total_rx_ring_size; - unsigned long work_tx; - unsigned long work_rx; - unsigned long work_ts; - unsigned long work_mdio; - struct platform_device *pdev; int dev_id; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 2d0d313ee7c5..3982285ed020 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -75,8 +75,6 @@ static void fec_enet_itr_coal_init(struct net_device *ndev); #define DRIVER_NAME "fec" -#define FEC_ENET_GET_QUQUE(_x) ((_x == 0) ? 1 : ((_x == 1) ? 2 : 0)) - /* Pause frame feild and FIFO threshold */ #define FEC_ENET_FCE (1 << 5) #define FEC_ENET_RSEM_V 0x84 @@ -1248,8 +1246,6 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) fep = netdev_priv(ndev); - queue_id = FEC_ENET_GET_QUQUE(queue_id); - txq = fep->tx_queue[queue_id]; /* get next bdp of dirty_tx */ nq = netdev_get_tx_queue(ndev, queue_id); @@ -1340,17 +1336,14 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) writel(0, txq->bd.reg_desc_active); } -static void -fec_enet_tx(struct net_device *ndev) +static void fec_enet_tx(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); - u16 queue_id; - /* First process class A queue, then Class B and Best Effort queue */ - for_each_set_bit(queue_id, &fep->work_tx, FEC_ENET_MAX_TX_QS) { - clear_bit(queue_id, &fep->work_tx); - fec_enet_tx_queue(ndev, queue_id); - } - return; + int i; + + /* Make sure that AVB queues are processed first. */ + for (i = fep->num_tx_queues - 1; i >= 0; i--) + fec_enet_tx_queue(ndev, i); } static int @@ -1426,7 +1419,6 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) #ifdef CONFIG_M532x flush_cache_all(); #endif - queue_id = FEC_ENET_GET_QUQUE(queue_id); rxq = fep->rx_queue[queue_id]; /* First, grab all of the stats for the incoming packet. @@ -1550,6 +1542,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) htons(ETH_P_8021Q), vlan_tag); + skb_record_rx_queue(skb, queue_id); napi_gro_receive(&fep->napi, skb); if (is_copybreak) { @@ -1595,57 +1588,21 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) return pkt_received; } -static int -fec_enet_rx(struct net_device *ndev, int budget) +static int fec_enet_rx(struct net_device *ndev, int budget) { - int pkt_received = 0; - u16 queue_id; struct fec_enet_private *fep = netdev_priv(ndev); + int i, done = 0; - for_each_set_bit(queue_id, &fep->work_rx, FEC_ENET_MAX_RX_QS) { - int ret; + /* Make sure that AVB queues are processed first. */ + for (i = fep->num_rx_queues - 1; i >= 0; i--) + done += fec_enet_rx_queue(ndev, budget - done, i); - ret = fec_enet_rx_queue(ndev, - budget - pkt_received, queue_id); - - if (ret < budget - pkt_received) - clear_bit(queue_id, &fep->work_rx); - - pkt_received += ret; - } - return pkt_received; + return done; } -static bool -fec_enet_collect_events(struct fec_enet_private *fep, uint int_events) +static bool fec_enet_collect_events(struct fec_enet_private *fep) { - if (int_events == 0) - return false; - - if (int_events & FEC_ENET_RXF_0) - fep->work_rx |= (1 << 2); - if (int_events & FEC_ENET_RXF_1) - fep->work_rx |= (1 << 0); - if (int_events & FEC_ENET_RXF_2) - fep->work_rx |= (1 << 1); - - if (int_events & FEC_ENET_TXF_0) - fep->work_tx |= (1 << 2); - if (int_events & FEC_ENET_TXF_1) - fep->work_tx |= (1 << 0); - if (int_events & FEC_ENET_TXF_2) - fep->work_tx |= (1 << 1); - - return true; -} - -static irqreturn_t -fec_enet_interrupt(int irq, void *dev_id) -{ - struct net_device *ndev = dev_id; - struct fec_enet_private *fep = netdev_priv(ndev); uint int_events; - irqreturn_t ret = IRQ_NONE; int_events = readl(fep->hwp + FEC_IEVENT); @@ -1653,9 +1610,18 @@ fec_enet_interrupt(int irq, void *dev_id) int_events &= ~FEC_ENET_MII; writel(int_events, fep->hwp + FEC_IEVENT); - fec_enet_collect_events(fep, int_events); - if ((fep->work_tx || fep->work_rx) && fep->link) { + return int_events != 0; +} + +static irqreturn_t +fec_enet_interrupt(int irq, void *dev_id) +{ + struct net_device *ndev = dev_id; + struct fec_enet_private *fep = netdev_priv(ndev); + irqreturn_t ret = IRQ_NONE; + + if (fec_enet_collect_events(fep) && fep->link) { ret = IRQ_HANDLED; if (napi_schedule_prep(&fep->napi)) { @@ -1672,17 +1638,19 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget) { struct net_device *ndev = napi->dev; struct fec_enet_private *fep = netdev_priv(ndev); - int pkts; + int done = 0; - pkts = fec_enet_rx(ndev, budget); + do { + done += fec_enet_rx(ndev, budget - done); + fec_enet_tx(ndev); + } while ((done < budget) && fec_enet_collect_events(fep)); - fec_enet_tx(ndev); - - if (pkts < budget) { - napi_complete_done(napi, pkts); + if (done < budget) { + napi_complete_done(napi, done); writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); } - return pkts; + + return done; } /* ------------------------------------------------------------------------- */ From 5eff06902394425c722f0a44d9545909a8800f79 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 3 Jul 2020 17:00:32 +0200 Subject: [PATCH 214/343] ipv4: fill fl4_icmp_{type,code} in ping_v4_sendmsg IPv4 ping sockets don't set fl4.fl4_icmp_{type,code}, which leads to incomplete IPsec ACQUIRE messages being sent to userspace. Currently, both raw sockets and IPv6 ping sockets set those fields. Expected output of "ip xfrm monitor": acquire proto esp sel src 10.0.2.15/32 dst 8.8.8.8/32 proto icmp type 8 code 0 dev ens4 policy src 10.0.2.15/32 dst 8.8.8.8/32 Currently with ping sockets: acquire proto esp sel src 10.0.2.15/32 dst 8.8.8.8/32 proto icmp type 0 code 0 dev ens4 policy src 10.0.2.15/32 dst 8.8.8.8/32 The Libreswan test suite found this problem after Fedora changed the value for the sysctl net.ipv4.ping_group_range. Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Reported-by: Paul Wouters Tested-by: Paul Wouters Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv4/ping.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 535427292194..df6fbefe44d4 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -786,6 +786,9 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, sk->sk_uid); + fl4.fl4_icmp_type = user_icmph.type; + fl4.fl4_icmp_code = user_icmph.code; + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { From 9c29e36152748fd623fcff6cc8f538550f9eeafc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 3 Jul 2020 18:06:04 +0200 Subject: [PATCH 215/343] mptcp: fix DSS map generation on fin retransmission The RFC 8684 mandates that no-data DATA FIN packets should carry a DSS with 0 sequence number and data len equal to 1. Currently, on FIN retransmission we re-use the existing mapping; if the previous fin transmission was part of a partially acked data packet, we could end-up writing in the egress packet a non-compliant DSS. The above will be detected by a "Bad mapping" warning on the receiver side. This change addresses the issue explicitly checking for 0 len packet when adding the DATA_FIN option. Fixes: 6d0060f600ad ("mptcp: Write MPTCP DSS headers to outgoing data packets") Reported-by: syzbot+42a07faa5923cfaeb9c9@syzkaller.appspotmail.com Tested-by: Christoph Paasch Reviewed-by: Christoph Paasch Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/options.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index df9a51425c6f..8f940be42f98 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -449,9 +449,9 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, } static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, - struct mptcp_ext *ext) + struct sk_buff *skb, struct mptcp_ext *ext) { - if (!ext->use_map) { + if (!ext->use_map || !skb->len) { /* RFC6824 requires a DSS mapping with specific values * if DATA_FIN is set but no data payload is mapped */ @@ -503,7 +503,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, opts->ext_copy = *mpext; if (skb && tcp_fin && subflow->data_fin_tx_enable) - mptcp_write_data_fin(subflow, &opts->ext_copy); + mptcp_write_data_fin(subflow, skb, &opts->ext_copy); ret = true; } From f0b594dfa47555d8d69e6865c882d65a9054cb81 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 3 Jul 2020 18:44:32 +0200 Subject: [PATCH 216/343] net/mlx5e: Do not include rwlock.h directly rwlock.h should not be included directly. Instead linux/splinlock.h should be included. Including it directly will break the RT build. Fixes: 549c243e4e010 ("net/mlx5e: Extract neigh-specific code from en_rep.c to rep/neigh.c") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Leon Romanovsky Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c index baa162432e75..c3d167fa944c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include From 8367b3ab6e9a26dec5d5d07ea3eae17a21035322 Mon Sep 17 00:00:00 2001 From: wenxu Date: Sat, 4 Jul 2020 15:42:47 +0800 Subject: [PATCH 217/343] net/sched: act_ct: add miss tcf_lastuse_update. When tcf_ct_act execute the tcf_lastuse_update should be update or the used stats never update filter protocol ip pref 3 flower chain 0 filter protocol ip pref 3 flower chain 0 handle 0x1 eth_type ipv4 dst_ip 1.1.1.1 ip_flags frag/firstfrag skip_hw not_in_hw action order 1: ct zone 1 nat pipe index 1 ref 1 bind 1 installed 103 sec used 103 sec Action statistics: Sent 151500 bytes 101 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 cookie 4519c04dc64a1a295787aab13b6a50fb Signed-off-by: wenxu Signed-off-by: David S. Miller --- net/sched/act_ct.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 86ed02487467..67504aece9ae 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -925,6 +925,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, force = p->ct_action & TCA_CT_ACT_FORCE; tmpl = p->tmpl; + tcf_lastuse_update(&c->tcf_tm); + if (clear) { ct = nf_ct_get(skb, &ctinfo); if (ct) { From 394de110a73395de2ca4516b0de435e91b11b604 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Sun, 5 Jul 2020 14:23:49 +0530 Subject: [PATCH 218/343] net: Added pointer check for dst->ops->neigh_lookup in dst_neigh_lookup_skb The packets from tunnel devices (eg bareudp) may have only metadata in the dst pointer of skb. Hence a pointer check of neigh_lookup is needed in dst_neigh_lookup_skb Kernel crashes when packets from bareudp device is processed in the kernel neighbour subsytem. [ 133.384484] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 133.385240] #PF: supervisor instruction fetch in kernel mode [ 133.385828] #PF: error_code(0x0010) - not-present page [ 133.386603] PGD 0 P4D 0 [ 133.386875] Oops: 0010 [#1] SMP PTI [ 133.387275] CPU: 0 PID: 5045 Comm: ping Tainted: G W 5.8.0-rc2+ #15 [ 133.388052] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 133.391076] RIP: 0010:0x0 [ 133.392401] Code: Bad RIP value. [ 133.394029] RSP: 0018:ffffb79980003d50 EFLAGS: 00010246 [ 133.396656] RAX: 0000000080000102 RBX: ffff9de2fe0d6600 RCX: ffff9de2fe5e9d00 [ 133.399018] RDX: 0000000000000000 RSI: ffff9de2fe5e9d00 RDI: ffff9de2fc21b400 [ 133.399685] RBP: ffff9de2fe5e9d00 R08: 0000000000000000 R09: 0000000000000000 [ 133.400350] R10: ffff9de2fbc6be22 R11: ffff9de2fe0d6600 R12: ffff9de2fc21b400 [ 133.401010] R13: ffff9de2fe0d6628 R14: 0000000000000001 R15: 0000000000000003 [ 133.401667] FS: 00007fe014918740(0000) GS:ffff9de2fec00000(0000) knlGS:0000000000000000 [ 133.402412] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 133.402948] CR2: ffffffffffffffd6 CR3: 000000003bb72000 CR4: 00000000000006f0 [ 133.403611] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 133.404270] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 133.404933] Call Trace: [ 133.405169] [ 133.405367] __neigh_update+0x5a4/0x8f0 [ 133.405734] arp_process+0x294/0x820 [ 133.406076] ? __netif_receive_skb_core+0x866/0xe70 [ 133.406557] arp_rcv+0x129/0x1c0 [ 133.406882] __netif_receive_skb_one_core+0x95/0xb0 [ 133.407340] process_backlog+0xa7/0x150 [ 133.407705] net_rx_action+0x2af/0x420 [ 133.408457] __do_softirq+0xda/0x2a8 [ 133.408813] asm_call_on_stack+0x12/0x20 [ 133.409290] [ 133.409519] do_softirq_own_stack+0x39/0x50 [ 133.410036] do_softirq+0x50/0x60 [ 133.410401] __local_bh_enable_ip+0x50/0x60 [ 133.410871] ip_finish_output2+0x195/0x530 [ 133.411288] ip_output+0x72/0xf0 [ 133.411673] ? __ip_finish_output+0x1f0/0x1f0 [ 133.412122] ip_send_skb+0x15/0x40 [ 133.412471] raw_sendmsg+0x853/0xab0 [ 133.412855] ? insert_pfn+0xfe/0x270 [ 133.413827] ? vvar_fault+0xec/0x190 [ 133.414772] sock_sendmsg+0x57/0x80 [ 133.415685] __sys_sendto+0xdc/0x160 [ 133.416605] ? syscall_trace_enter+0x1d4/0x2b0 [ 133.417679] ? __audit_syscall_exit+0x1d9/0x280 [ 133.418753] ? __prepare_exit_to_usermode+0x5d/0x1a0 [ 133.419819] __x64_sys_sendto+0x24/0x30 [ 133.420848] do_syscall_64+0x4d/0x90 [ 133.421768] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 133.422833] RIP: 0033:0x7fe013689c03 [ 133.423749] Code: Bad RIP value. [ 133.424624] RSP: 002b:00007ffc7288f418 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 133.425940] RAX: ffffffffffffffda RBX: 000056151fc63720 RCX: 00007fe013689c03 [ 133.427225] RDX: 0000000000000040 RSI: 000056151fc63720 RDI: 0000000000000003 [ 133.428481] RBP: 00007ffc72890b30 R08: 000056151fc60500 R09: 0000000000000010 [ 133.429757] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040 [ 133.431041] R13: 000056151fc636e0 R14: 000056151fc616bc R15: 0000000000000080 [ 133.432481] Modules linked in: mpls_iptunnel act_mirred act_tunnel_key cls_flower sch_ingress veth mpls_router ip_tunnel bareudp ip6_udp_tunnel udp_tunnel macsec udp_diag inet_diag unix_diag af_packet_diag netlink_diag binfmt_misc xt_MASQUERADE iptable_nat xt_addrtype xt_conntrack nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter bridge stp llc ebtable_filter ebtables overlay ip6table_filter ip6_tables iptable_filter sunrpc ext4 mbcache jbd2 pcspkr i2c_piix4 virtio_balloon joydev ip_tables xfs libcrc32c ata_generic qxl pata_acpi drm_ttm_helper ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm ata_piix libata virtio_net net_failover virtio_console failover virtio_blk i2c_core virtio_pci virtio_ring serio_raw floppy virtio dm_mirror dm_region_hash dm_log dm_mod [ 133.444045] CR2: 0000000000000000 [ 133.445082] ---[ end trace f4aeee1958fd1638 ]--- [ 133.446236] RIP: 0010:0x0 [ 133.447180] Code: Bad RIP value. [ 133.448152] RSP: 0018:ffffb79980003d50 EFLAGS: 00010246 [ 133.449363] RAX: 0000000080000102 RBX: ffff9de2fe0d6600 RCX: ffff9de2fe5e9d00 [ 133.450835] RDX: 0000000000000000 RSI: ffff9de2fe5e9d00 RDI: ffff9de2fc21b400 [ 133.452237] RBP: ffff9de2fe5e9d00 R08: 0000000000000000 R09: 0000000000000000 [ 133.453722] R10: ffff9de2fbc6be22 R11: ffff9de2fe0d6600 R12: ffff9de2fc21b400 [ 133.455149] R13: ffff9de2fe0d6628 R14: 0000000000000001 R15: 0000000000000003 [ 133.456520] FS: 00007fe014918740(0000) GS:ffff9de2fec00000(0000) knlGS:0000000000000000 [ 133.458046] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 133.459342] CR2: ffffffffffffffd6 CR3: 000000003bb72000 CR4: 00000000000006f0 [ 133.460782] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 133.462240] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 133.463697] Kernel panic - not syncing: Fatal exception in interrupt [ 133.465226] Kernel Offset: 0xfa00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 133.467025] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fixes: aaa0c23cb901 ("Fix dst_neigh_lookup/dst_neigh_lookup_skb return value handling bug") Signed-off-by: Martin Varghese Signed-off-by: David S. Miller --- include/net/dst.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/dst.h b/include/net/dst.h index 07adfacd8088..852d8fb36ab7 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -400,7 +400,15 @@ static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, co static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, struct sk_buff *skb) { - struct neighbour *n = dst->ops->neigh_lookup(dst, skb, NULL); + struct neighbour *n = NULL; + + /* The packets from tunnel devices (eg bareudp) may have only + * metadata in the dst pointer of skb. Hence a pointer check of + * neigh_lookup is needed. + */ + if (dst->ops->neigh_lookup) + n = dst->ops->neigh_lookup(dst, skb, NULL); + return IS_ERR(n) ? NULL : n; } From 5fc6266af7b427243da24f3443a50cd4584aac06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Sun, 5 Jul 2020 21:10:17 +0200 Subject: [PATCH 219/343] bridge: mcast: Fix MLD2 Report IPv6 payload length check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit e57f61858b7c ("net: bridge: mcast: fix stale nsrcs pointer in igmp3/mld2 report handling") introduced a bug in the IPv6 header payload length check which would potentially lead to rejecting a valid MLD2 Report: The check needs to take into account the 2 bytes for the "Number of Sources" field in the "Multicast Address Record" before reading it. And not the size of a pointer to this field. Fixes: e57f61858b7c ("net: bridge: mcast: fix stale nsrcs pointer in igmp3/mld2 report handling") Acked-by: Nikolay Aleksandrov Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 83490bf73a13..4c4a93abde68 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1007,7 +1007,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs); if (skb_transport_offset(skb) + ipv6_transport_len(skb) < - nsrcs_offset + sizeof(_nsrcs)) + nsrcs_offset + sizeof(__nsrcs)) return -EINVAL; _nsrcs = skb_header_pointer(skb, nsrcs_offset, From da3287111ab43b32cec54d7ca6b48640f210a196 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 6 Jul 2020 12:25:53 +0300 Subject: [PATCH 220/343] net: qed: fix buffer overflow on ethtool -d When generating debug dump, driver firstly collects all data in binary form, and then performs per-feature formatting to human-readable if it is supported. For ethtool -d, this is roughly incorrect for two reasons. First of all, drivers should always provide only original raw dumps to Ethtool without any changes. The second, and more critical, is that Ethtool's output buffer size is strictly determined by ethtool_ops::get_regs_len(), and all data *must* fit in it. The current version of driver always returns the size of raw data, but the size of the formatted buffer exceeds it in most cases. This leads to out-of-bound writes and memory corruption. Address both issues by adding an option to return original, non-formatted debug data, and using it for Ethtool case. v2: - Expand commit message to make it more clear; - No functional changes. Fixes: c965db444629 ("qed: Add support for debug data collection") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 2 ++ drivers/net/ethernet/qlogic/qed/qed_debug.c | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index a49743d56b9c..6c2f9ff4a53e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -876,6 +876,8 @@ struct qed_dev { struct qed_dbg_feature dbg_features[DBG_FEATURE_NUM]; u8 engine_for_debug; bool disable_ilt_dump; + bool dbg_bin_dump; + DECLARE_HASHTABLE(connections, 10); const struct firmware *firmware; diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 81e8fbe4a05b..cb80863d5a77 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -7506,6 +7506,12 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn, if (p_hwfn->cdev->print_dbg_data) qed_dbg_print_feature(text_buf, text_size_bytes); + /* Just return the original binary buffer if requested */ + if (p_hwfn->cdev->dbg_bin_dump) { + vfree(text_buf); + return DBG_STATUS_OK; + } + /* Free the old dump_buf and point the dump_buf to the newly allocagted * and formatted text buffer. */ @@ -7733,7 +7739,9 @@ int qed_dbg_mcp_trace_size(struct qed_dev *cdev) #define REGDUMP_HEADER_SIZE_SHIFT 0 #define REGDUMP_HEADER_SIZE_MASK 0xffffff #define REGDUMP_HEADER_FEATURE_SHIFT 24 -#define REGDUMP_HEADER_FEATURE_MASK 0x3f +#define REGDUMP_HEADER_FEATURE_MASK 0x1f +#define REGDUMP_HEADER_BIN_DUMP_SHIFT 29 +#define REGDUMP_HEADER_BIN_DUMP_MASK 0x1 #define REGDUMP_HEADER_OMIT_ENGINE_SHIFT 30 #define REGDUMP_HEADER_OMIT_ENGINE_MASK 0x1 #define REGDUMP_HEADER_ENGINE_SHIFT 31 @@ -7771,6 +7779,7 @@ static u32 qed_calc_regdump_header(struct qed_dev *cdev, feature, feature_size); SET_FIELD(res, REGDUMP_HEADER_FEATURE, feature); + SET_FIELD(res, REGDUMP_HEADER_BIN_DUMP, 1); SET_FIELD(res, REGDUMP_HEADER_OMIT_ENGINE, omit_engine); SET_FIELD(res, REGDUMP_HEADER_ENGINE, engine); @@ -7794,6 +7803,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) omit_engine = 1; mutex_lock(&qed_dbg_lock); + cdev->dbg_bin_dump = true; org_engine = qed_get_debug_engine(cdev); for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) { @@ -7993,6 +8003,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) QED_NVM_IMAGE_MDUMP, "QED_NVM_IMAGE_MDUMP", rc); } + cdev->dbg_bin_dump = false; mutex_unlock(&qed_dbg_lock); return 0; From 469aceddfa3ed16e17ee30533fae45e90f62efd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 7 Jul 2020 13:03:25 +0200 Subject: [PATCH 221/343] vlan: consolidate VLAN parsing code and limit max parsing depth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Toshiaki pointed out that we now have two very similar functions to extract the L3 protocol number in the presence of VLAN tags. And Daniel pointed out that the unbounded parsing loop makes it possible for maliciously crafted packets to loop through potentially hundreds of tags. Fix both of these issues by consolidating the two parsing functions and limiting the VLAN tag parsing to a max depth of 8 tags. As part of this, switch over __vlan_get_protocol() to use skb_header_pointer() instead of pskb_may_pull(), to avoid the possible side effects of the latter and keep the skb pointer 'const' through all the parsing functions. v2: - Use limit of 8 tags instead of 32 (matching XMIT_RECURSION_LIMIT) Reported-by: Toshiaki Makita Reported-by: Daniel Borkmann Fixes: d7bf2ebebc2b ("sched: consistently handle layer3 header accesses in the presence of VLANs") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 57 ++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 427a5b8597c2..41a518336673 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -25,6 +25,8 @@ #define VLAN_ETH_DATA_LEN 1500 /* Max. octets in payload */ #define VLAN_ETH_FRAME_LEN 1518 /* Max. octets in frame sans FCS */ +#define VLAN_MAX_DEPTH 8 /* Max. number of nested VLAN tags parsed */ + /* * struct vlan_hdr - vlan header * @h_vlan_TCI: priority and VLAN ID @@ -308,34 +310,6 @@ static inline bool eth_type_vlan(__be16 ethertype) } } -/* A getter for the SKB protocol field which will handle VLAN tags consistently - * whether VLAN acceleration is enabled or not. - */ -static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) -{ - unsigned int offset = skb_mac_offset(skb) + sizeof(struct ethhdr); - __be16 proto = skb->protocol; - - if (!skip_vlan) - /* VLAN acceleration strips the VLAN header from the skb and - * moves it to skb->vlan_proto - */ - return skb_vlan_tag_present(skb) ? skb->vlan_proto : proto; - - while (eth_type_vlan(proto)) { - struct vlan_hdr vhdr, *vh; - - vh = skb_header_pointer(skb, offset, sizeof(vhdr), &vhdr); - if (!vh) - break; - - proto = vh->h_vlan_encapsulated_proto; - offset += sizeof(vhdr); - } - - return proto; -} - static inline bool vlan_hw_offload_capable(netdev_features_t features, __be16 proto) { @@ -605,10 +579,10 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, +static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, int *depth) { - unsigned int vlan_depth = skb->mac_len; + unsigned int vlan_depth = skb->mac_len, parse_depth = VLAN_MAX_DEPTH; /* if type is 802.1Q/AD then the header should already be * present at mac_len - VLAN_HLEN (if mac_len > 0), or at @@ -623,13 +597,12 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, vlan_depth = ETH_HLEN; } do { - struct vlan_hdr *vh; + struct vlan_hdr vhdr, *vh; - if (unlikely(!pskb_may_pull(skb, - vlan_depth + VLAN_HLEN))) + vh = skb_header_pointer(skb, vlan_depth, sizeof(vhdr), &vhdr); + if (unlikely(!vh || !--parse_depth)) return 0; - vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } while (eth_type_vlan(type)); @@ -648,11 +621,25 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 vlan_get_protocol(struct sk_buff *skb) +static inline __be16 vlan_get_protocol(const struct sk_buff *skb) { return __vlan_get_protocol(skb, skb->protocol, NULL); } +/* A getter for the SKB protocol field which will handle VLAN tags consistently + * whether VLAN acceleration is enabled or not. + */ +static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) +{ + if (!skip_vlan) + /* VLAN acceleration strips the VLAN header from the skb and + * moves it to skb->vlan_proto + */ + return skb_vlan_tag_present(skb) ? skb->vlan_proto : skb->protocol; + + return vlan_get_protocol(skb); +} + static inline void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr) { From 086c18f2452d0028f81e319f098bcb8e53133dbf Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 7 Jul 2020 14:13:26 -0700 Subject: [PATCH 222/343] ionic: centralize queue reset code The queue reset pattern is used in a couple different places, only slightly different from each other, and could cause issues if one gets changed and the other didn't. This puts them together so that only one version is needed, yet each can have slighty different effects by passing in a pointer to a work function to do whatever configuration twiddling is needed in the middle of the reset. This specifically addresses issues seen where under loops of changing ring size or queue count parameters we could occasionally bump into the netdev watchdog. v2: added more commit message commentary Fixes: 4d03e00a2140 ("ionic: Add initial ethtool support") Signed-off-by: Shannon Nelson Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../ethernet/pensando/ionic/ionic_ethtool.c | 52 ++++++------------- .../net/ethernet/pensando/ionic/ionic_lif.c | 17 ++++-- .../net/ethernet/pensando/ionic/ionic_lif.h | 4 +- 3 files changed, 32 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index f7e3ce3de04d..e03ea9b18f95 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -468,12 +468,18 @@ static void ionic_get_ringparam(struct net_device *netdev, ring->rx_pending = lif->nrxq_descs; } +static void ionic_set_ringsize(struct ionic_lif *lif, void *arg) +{ + struct ethtool_ringparam *ring = arg; + + lif->ntxq_descs = ring->tx_pending; + lif->nrxq_descs = ring->rx_pending; +} + static int ionic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) { struct ionic_lif *lif = netdev_priv(netdev); - bool running; - int err; if (ring->rx_mini_pending || ring->rx_jumbo_pending) { netdev_info(netdev, "Changing jumbo or mini descriptors not supported\n"); @@ -491,22 +497,7 @@ static int ionic_set_ringparam(struct net_device *netdev, ring->rx_pending == lif->nrxq_descs) return 0; - err = ionic_wait_for_bit(lif, IONIC_LIF_F_QUEUE_RESET); - if (err) - return err; - - running = test_bit(IONIC_LIF_F_UP, lif->state); - if (running) - ionic_stop(netdev); - - lif->ntxq_descs = ring->tx_pending; - lif->nrxq_descs = ring->rx_pending; - - if (running) - ionic_open(netdev); - clear_bit(IONIC_LIF_F_QUEUE_RESET, lif->state); - - return 0; + return ionic_reset_queues(lif, ionic_set_ringsize, ring); } static void ionic_get_channels(struct net_device *netdev, @@ -521,12 +512,17 @@ static void ionic_get_channels(struct net_device *netdev, ch->combined_count = lif->nxqs; } +static void ionic_set_queuecount(struct ionic_lif *lif, void *arg) +{ + struct ethtool_channels *ch = arg; + + lif->nxqs = ch->combined_count; +} + static int ionic_set_channels(struct net_device *netdev, struct ethtool_channels *ch) { struct ionic_lif *lif = netdev_priv(netdev); - bool running; - int err; if (!ch->combined_count || ch->other_count || ch->rx_count || ch->tx_count) @@ -535,21 +531,7 @@ static int ionic_set_channels(struct net_device *netdev, if (ch->combined_count == lif->nxqs) return 0; - err = ionic_wait_for_bit(lif, IONIC_LIF_F_QUEUE_RESET); - if (err) - return err; - - running = test_bit(IONIC_LIF_F_UP, lif->state); - if (running) - ionic_stop(netdev); - - lif->nxqs = ch->combined_count; - - if (running) - ionic_open(netdev); - clear_bit(IONIC_LIF_F_QUEUE_RESET, lif->state); - - return 0; + return ionic_reset_queues(lif, ionic_set_queuecount, ch); } static u32 ionic_get_priv_flags(struct net_device *netdev) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 3c9dde31f3fa..f49486b6d04d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1313,7 +1313,7 @@ static int ionic_change_mtu(struct net_device *netdev, int new_mtu) return err; netdev->mtu = new_mtu; - err = ionic_reset_queues(lif); + err = ionic_reset_queues(lif, NULL, NULL); return err; } @@ -1325,7 +1325,7 @@ static void ionic_tx_timeout_work(struct work_struct *ws) netdev_info(lif->netdev, "Tx Timeout recovery\n"); rtnl_lock(); - ionic_reset_queues(lif); + ionic_reset_queues(lif, NULL, NULL); rtnl_unlock(); } @@ -1988,7 +1988,7 @@ static const struct net_device_ops ionic_netdev_ops = { .ndo_get_vf_stats = ionic_get_vf_stats, }; -int ionic_reset_queues(struct ionic_lif *lif) +int ionic_reset_queues(struct ionic_lif *lif, ionic_reset_cb cb, void *arg) { bool running; int err = 0; @@ -2001,12 +2001,19 @@ int ionic_reset_queues(struct ionic_lif *lif) if (running) { netif_device_detach(lif->netdev); err = ionic_stop(lif->netdev); + if (err) + goto reset_out; } - if (!err && running) { - ionic_open(lif->netdev); + + if (cb) + cb(lif, arg); + + if (running) { + err = ionic_open(lif->netdev); netif_device_attach(lif->netdev); } +reset_out: clear_bit(IONIC_LIF_F_QUEUE_RESET, lif->state); return err; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index c3428034a17b..ed126dd74e01 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -248,6 +248,8 @@ static inline u32 ionic_coal_hw_to_usec(struct ionic *ionic, u32 units) return (units * div) / mult; } +typedef void (*ionic_reset_cb)(struct ionic_lif *lif, void *arg); + void ionic_link_status_check_request(struct ionic_lif *lif); void ionic_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *ns); @@ -267,7 +269,7 @@ int ionic_lif_rss_config(struct ionic_lif *lif, u16 types, int ionic_open(struct net_device *netdev); int ionic_stop(struct net_device *netdev); -int ionic_reset_queues(struct ionic_lif *lif); +int ionic_reset_queues(struct ionic_lif *lif, ionic_reset_cb cb, void *arg); static inline void debug_stats_txq_post(struct ionic_qcq *qcq, struct ionic_txq_desc *desc, bool dbell) From 4ef9b4f1a76ea2370fbfe20e80fef141ab92b65e Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 7 Jul 2020 18:08:46 -0500 Subject: [PATCH 223/343] smb3: fix access denied on change notify request to some servers read permission, not just read attributes permission, is required on the directory. See MS-SMB2 (protocol specification) section 3.3.5.19. Signed-off-by: Steve French CC: Stable # v5.6+ Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index d9fdafa5eb60..32f90dc82c84 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2148,7 +2148,7 @@ smb3_notify(const unsigned int xid, struct file *pfile, tcon = cifs_sb_master_tcon(cifs_sb); oparms.tcon = tcon; - oparms.desired_access = FILE_READ_ATTRIBUTES; + oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA; oparms.disposition = FILE_OPEN; oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; From 5a383d443b29a140094430f3ad1d02fa1acc2b80 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Jul 2020 18:01:38 +0100 Subject: [PATCH 224/343] drm/i915/gt: Pin the rings before marking active On eviction, we acquire the vm->mutex and then wait on the vma->active. Therefore when binding and pinning the vma, we must follow the same sequence, lock/pin the vma then mark it active. Otherwise, we mark the vma as active, then wait for the vm->mutex, and meanwhile the evictor holding the mutex waits upon us to complete our activity. Fixes: 8ccfc20a7d56 ("drm/i915/gt: Mark ring->vma as active while pinned") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.6+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200706170138.8993-1-chris@chris-wilson.co.uk (cherry picked from commit 8567774e87e23a57155e5102f81208729b992ae6) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_context.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index e4aece20bc80..52db2bde44a3 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -204,25 +204,25 @@ static int __ring_active(struct intel_ring *ring) { int err; - err = i915_active_acquire(&ring->vma->active); + err = intel_ring_pin(ring); if (err) return err; - err = intel_ring_pin(ring); + err = i915_active_acquire(&ring->vma->active); if (err) - goto err_active; + goto err_pin; return 0; -err_active: - i915_active_release(&ring->vma->active); +err_pin: + intel_ring_unpin(ring); return err; } static void __ring_retire(struct intel_ring *ring) { - intel_ring_unpin(ring); i915_active_release(&ring->vma->active); + intel_ring_unpin(ring); } __i915_active_call From 72923e24f98aa5d99adeb83b7b4f0ec1496e5b5e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 17 Jun 2020 11:08:41 +1000 Subject: [PATCH 225/343] drm/nouveau/kms/nv50-: bail from nv50_audio_disable() early if audio not enabled Prevents "snd_hda_codec_hdmi hdaudioC1D0: HDMI: pin nid 5 not registered" that occur on some configurations. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index d472942102f5..519f99868e35 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -601,6 +601,9 @@ nv50_audio_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc) (0x0100 << nv_crtc->index), }; + if (!nv_encoder->audio) + return; + nv_encoder->audio = false; nvif_mthd(&disp->disp->object, 0, &args, sizeof(args)); From 0156e76d388310a490aeb0f2fbb5b284ded3aecc Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 23 Jun 2020 16:39:13 +1000 Subject: [PATCH 226/343] drm/nouveau/i2c/g94-: increase NV_PMGR_DP_AUXCTL_TRANSACTREQ timeout Tegra TRM says worst-case reply time is 1216us, and this should fix some spurious timeouts that have been popping up. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c | 4 ++-- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c index c8ab1b5741a3..db7769cb33eb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c @@ -118,10 +118,10 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, if (retries) udelay(400); - /* transaction request, wait up to 1ms for it to complete */ + /* transaction request, wait up to 2ms for it to complete */ nvkm_wr32(device, 0x00e4e4 + base, 0x00010000 | ctrl); - timeout = 1000; + timeout = 2000; do { ctrl = nvkm_rd32(device, 0x00e4e4 + base); udelay(1); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c index 7ef60895f43a..edb6148cbca0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c @@ -118,10 +118,10 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, if (retries) udelay(400); - /* transaction request, wait up to 1ms for it to complete */ + /* transaction request, wait up to 2ms for it to complete */ nvkm_wr32(device, 0x00d954 + base, 0x00010000 | ctrl); - timeout = 1000; + timeout = 2000; do { ctrl = nvkm_rd32(device, 0x00d954 + base); udelay(1); From ad61f5f5e02e0aaf667efcd0c44ba5d70ea0b356 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 22 Jun 2020 16:38:52 -0700 Subject: [PATCH 227/343] drm/nouveau/svm: fix migrate page regression The patch to add zero page migration to GPU memory inadvertently included part of a future change which broke normal page migration to GPU memory by copying too much data and corrupting GPU memory. Fix this by only copying one page instead of a byte count. Fixes: 9d4296a7d4b3 ("drm/nouveau/nouveau/hmm: fix migrate zero page to GPU") Signed-off-by: Ralph Campbell Reviewed-by: John Hubbard Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index e5c230d9ae24..cc9993837508 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -550,7 +550,7 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, *dma_addr)) goto out_free_page; - if (drm->dmem->migrate.copy_func(drm, page_size(spage), + if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST, *dma_addr)) goto out_dma_unmap; } else { From ed710a6ed797430026aa5116dd0ab22378798b69 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 26 Jun 2020 14:03:37 -0700 Subject: [PATCH 228/343] drm/nouveau/nouveau: fix page fault on device private memory If system memory is migrated to device private memory and no GPU MMU page table entry exists, the GPU will fault and call hmm_range_fault() to get the PFN for the page. Since the .dev_private_owner pointer in struct hmm_range is not set, hmm_range_fault returns an error which results in the GPU program stopping with a fatal fault. Fix this by setting .dev_private_owner appropriately. Fixes: 08ddddda667b ("mm/hmm: check the device private page owner in hmm_range_fault()") Cc: stable@vger.kernel.org Signed-off-by: Ralph Campbell Reviewed-by: Jason Gunthorpe Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_svm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index ba9f9359c30e..6586d9d39874 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -562,6 +562,7 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm, .end = notifier->notifier.interval_tree.last + 1, .pfn_flags_mask = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE, .hmm_pfns = hmm_pfns, + .dev_private_owner = drm->dev, }; struct mm_struct *mm = notifier->notifier.mm; int ret; From 0fd181456aa0826057adbfb6c79c40f4083cfd75 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 1 Jul 2020 22:14:54 +0900 Subject: [PATCH 229/343] scsi: mpt3sas: Fix error returns in BRM_status_show BRM_status_show() has several error branches, but none of them record the error in the error return. Also while at it remove the manual mutex_unlock() of the pci_access_mutex in case of an ongoing pci error recovery or host removal and jump to the cleanup label instead. Note: We can safely jump to out from here as io_unit_pg3 is initialized to NULL and if it hasn't been allocated, kfree() skips the NULL pointer. [mkp: compilation warning] Link: https://lore.kernel.org/r/20200701131454.5255-1-johannes.thumshirn@wdc.com Reviewed-by: Damien Le Moal Acked-by: Sreekanth Reddy Signed-off-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_ctl.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index e94e72de2fc6..983e568ff231 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -3149,15 +3149,14 @@ BRM_status_show(struct device *cdev, struct device_attribute *attr, } /* pci_access_mutex lock acquired by sysfs show path */ mutex_lock(&ioc->pci_access_mutex); - if (ioc->pci_error_recovery || ioc->remove_host) { - mutex_unlock(&ioc->pci_access_mutex); - return 0; - } + if (ioc->pci_error_recovery || ioc->remove_host) + goto out; /* allocate upto GPIOVal 36 entries */ sz = offsetof(Mpi2IOUnitPage3_t, GPIOVal) + (sizeof(u16) * 36); io_unit_pg3 = kzalloc(sz, GFP_KERNEL); if (!io_unit_pg3) { + rc = -ENOMEM; ioc_err(ioc, "%s: failed allocating memory for iounit_pg3: (%d) bytes\n", __func__, sz); goto out; @@ -3167,6 +3166,7 @@ BRM_status_show(struct device *cdev, struct device_attribute *attr, 0) { ioc_err(ioc, "%s: failed reading iounit_pg3\n", __func__); + rc = -EINVAL; goto out; } @@ -3174,12 +3174,14 @@ BRM_status_show(struct device *cdev, struct device_attribute *attr, if (ioc_status != MPI2_IOCSTATUS_SUCCESS) { ioc_err(ioc, "%s: iounit_pg3 failed with ioc_status(0x%04x)\n", __func__, ioc_status); + rc = -EINVAL; goto out; } if (io_unit_pg3->GPIOCount < 25) { ioc_err(ioc, "%s: iounit_pg3->GPIOCount less than 25 entries, detected (%d) entries\n", __func__, io_unit_pg3->GPIOCount); + rc = -EINVAL; goto out; } From 2e98c01846b2ce96cc41f60b70fd03cd47d3ac77 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Thu, 2 Jul 2020 15:25:26 +0800 Subject: [PATCH 230/343] cifs: remove the retry in cifs_poxis_lock_set The caller of cifs_posix_lock_set will do retry(like fcntl_setlk64->do_lock_file_wait) if we will wait for any file_lock. So the retry in cifs_poxis_lock_set seems duplicated, remove it to make a cleanup. Signed-off-by: yangerkun Signed-off-by: Steve French Reviewed-by: NeilBrown --- fs/cifs/file.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9b0f8f33f832..be46fab4c96d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1149,20 +1149,20 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock) /* * Set the byte-range lock (posix style). Returns: - * 1) 0, if we set the lock and don't need to request to the server; - * 2) 1, if we need to request to the server; - * 3) <0, if the error occurs while setting the lock. + * 1) <0, if the error occurs while setting the lock; + * 2) 0, if we set the lock and don't need to request to the server; + * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; + * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. */ static int cifs_posix_lock_set(struct file *file, struct file_lock *flock) { struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); - int rc = 1; + int rc = FILE_LOCK_DEFERRED + 1; if ((flock->fl_flags & FL_POSIX) == 0) return rc; -try_again: cifs_down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { up_write(&cinode->lock_sem); @@ -1171,13 +1171,6 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock) rc = posix_lock_file(file, flock, NULL); up_write(&cinode->lock_sem); - if (rc == FILE_LOCK_DEFERRED) { - rc = wait_event_interruptible(flock->fl_wait, - list_empty(&flock->fl_blocked_member)); - if (!rc) - goto try_again; - locks_delete_block(flock); - } return rc; } @@ -1652,7 +1645,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, int posix_lock_type; rc = cifs_posix_lock_set(file, flock); - if (!rc || rc < 0) + if (rc <= FILE_LOCK_DEFERRED) return rc; if (type & server->vals->shared_lock_type) From e094fd346021b820f37188aaa6b502c7490ab5b5 Mon Sep 17 00:00:00 2001 From: Steve Schremmer Date: Tue, 7 Jul 2020 17:07:22 +0000 Subject: [PATCH 231/343] scsi: dh: Add Fujitsu device to devinfo and dh lists Add FUJITSU ETERNUS_AHB Link: https://lore.kernel.org/r/DM6PR06MB5276CCA765336BD312C4282E8C660@DM6PR06MB5276.namprd06.prod.outlook.com Signed-off-by: Steve Schremmer Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 1 + drivers/scsi/scsi_dh.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index eed31021e788..ba84244c1b4f 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -239,6 +239,7 @@ static struct { {"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"LENOVO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"FUJITSU", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"SanDisk", "Cruzer Blade", NULL, BLIST_TRY_VPD_PAGES | BLIST_INQUIRY_36}, {"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36}, diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index 42f0550d6b11..6f41e4b5a2b8 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -63,6 +63,7 @@ static const struct scsi_dh_blist scsi_dh_blist[] = { {"LSI", "INF-01-00", "rdac", }, {"ENGENIO", "INF-01-00", "rdac", }, {"LENOVO", "DE_Series", "rdac", }, + {"FUJITSU", "ETERNUS_AHB", "rdac", }, {NULL, NULL, NULL }, }; From 97c7990c4bf6423578f5b7317027ff8b6fb7cdf6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 May 2020 08:42:36 +0200 Subject: [PATCH 232/343] cachefiles: switch to kernel_write __kernel_write doesn't take a sb_writers references, which we need here. Signed-off-by: Christoph Hellwig Reviewed-by: David Howells --- fs/cachefiles/rdwr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index e7726f5f1241..3080cda9e824 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -937,7 +937,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) } data = kmap(page); - ret = __kernel_write(file, data, len, &pos); + ret = kernel_write(file, data, len, &pos); kunmap(page); fput(file); if (ret != len) From 13c164b1a186dfe17d104d9638b86a06be1e40bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 May 2020 08:45:59 +0200 Subject: [PATCH 233/343] autofs: switch to kernel_write While pipes don't really need sb_writers projection, __kernel_write is an interface better kept private, and the additional rw_verify_area does not hurt here. Signed-off-by: Christoph Hellwig Acked-by: Ian Kent --- fs/autofs/waitq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c index b04c528b19d3..74c886f7c51c 100644 --- a/fs/autofs/waitq.c +++ b/fs/autofs/waitq.c @@ -53,7 +53,7 @@ static int autofs_write(struct autofs_sb_info *sbi, mutex_lock(&sbi->pipe_mutex); while (bytes) { - wr = __kernel_write(file, data, bytes, &file->f_pos); + wr = kernel_write(file, data, bytes, &file->f_pos); if (wr <= 0) break; data += wr; From 6955a76fbcd56d27c84c01353101048e366d070f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 May 2020 08:46:58 +0200 Subject: [PATCH 234/343] bpfilter: switch to kernel_write While pipes don't really need sb_writers projection, __kernel_write is an interface better kept private, and the additional rw_verify_area does not hurt here. Signed-off-by: Christoph Hellwig --- net/bpfilter/bpfilter_kern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index c0f0990f30b6..1905e01c3aa9 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -50,7 +50,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, req.len = optlen; if (!bpfilter_ops.info.pid) goto out; - n = __kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), + n = kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), &pos); if (n != sizeof(req)) { pr_err("write fail %zd\n", n); From 9db97752244919c604eb21df7ef2da3bf58aec73 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 May 2020 08:47:17 +0200 Subject: [PATCH 235/343] fs: unexport __kernel_write This is a very special interface that skips sb_writes protection, and not used by modules anymore. Signed-off-by: Christoph Hellwig --- fs/read_write.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/read_write.c b/fs/read_write.c index bbfa9b12b15e..2c601d853ff3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -522,7 +522,6 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t inc_syscw(current); return ret; } -EXPORT_SYMBOL(__kernel_write); ssize_t kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) From a01ac27be4729f80176a45c54611b768dfbdc840 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2020 08:55:03 +0200 Subject: [PATCH 236/343] fs: check FMODE_WRITE in __kernel_write Add a WARN_ON_ONCE if the file isn't actually open for write. This matches the check done in vfs_write, but actually warn warns as a kernel user calling write on a file not opened for writing is a pretty obvious programming error. Signed-off-by: Christoph Hellwig --- fs/read_write.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/read_write.c b/fs/read_write.c index 2c601d853ff3..8f9fc05990ae 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -505,6 +505,8 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t const char __user *p; ssize_t ret; + if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) + return -EBADF; if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; From 81238b2cff1469ff5b94390d026cd075105d6dcd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 May 2020 19:33:03 +0200 Subject: [PATCH 237/343] fs: implement kernel_write using __kernel_write Consolidate the two in-kernel write helpers to make upcoming changes easier. The only difference are the missing call to rw_verify_area in kernel_write, and an access_ok check that doesn't make sense for kernel buffers to start with. Signed-off-by: Christoph Hellwig --- fs/read_write.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 8f9fc05990ae..5110cd1e6e27 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -499,6 +499,7 @@ static ssize_t __vfs_write(struct file *file, const char __user *p, return -EINVAL; } +/* caller is responsible for file_start_write/file_end_write */ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) { mm_segment_t old_fs; @@ -528,16 +529,16 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t ssize_t kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) { - mm_segment_t old_fs; - ssize_t res; + ssize_t ret; - old_fs = get_fs(); - set_fs(KERNEL_DS); - /* The cast to a user pointer is valid due to the set_fs() */ - res = vfs_write(file, (__force const char __user *)buf, count, pos); - set_fs(old_fs); + ret = rw_verify_area(WRITE, file, pos, count); + if (ret) + return ret; - return res; + file_start_write(file); + ret = __kernel_write(file, buf, count, pos); + file_end_write(file); + return ret; } EXPORT_SYMBOL(kernel_write); From 53ad86266bda973b526078227997ca3fcb92c9be Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 May 2020 08:51:46 +0200 Subject: [PATCH 238/343] fs: remove __vfs_write Fold it into the two callers. Signed-off-by: Christoph Hellwig --- fs/read_write.c | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 5110cd1e6e27..96e8e354f99b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -488,17 +488,6 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t return ret; } -static ssize_t __vfs_write(struct file *file, const char __user *p, - size_t count, loff_t *pos) -{ - if (file->f_op->write) - return file->f_op->write(file, p, count, pos); - else if (file->f_op->write_iter) - return new_sync_write(file, p, count, pos); - else - return -EINVAL; -} - /* caller is responsible for file_start_write/file_end_write */ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) { @@ -516,7 +505,12 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t p = (__force const char __user *)buf; if (count > MAX_RW_COUNT) count = MAX_RW_COUNT; - ret = __vfs_write(file, p, count, pos); + if (file->f_op->write) + ret = file->f_op->write(file, p, count, pos); + else if (file->f_op->write_iter) + ret = new_sync_write(file, p, count, pos); + else + ret = -EINVAL; set_fs(old_fs); if (ret > 0) { fsnotify_modify(file); @@ -554,19 +548,23 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ return -EFAULT; ret = rw_verify_area(WRITE, file, pos, count); - if (!ret) { - if (count > MAX_RW_COUNT) - count = MAX_RW_COUNT; - file_start_write(file); - ret = __vfs_write(file, buf, count, pos); - if (ret > 0) { - fsnotify_modify(file); - add_wchar(current, ret); - } - inc_syscw(current); - file_end_write(file); + if (ret) + return ret; + if (count > MAX_RW_COUNT) + count = MAX_RW_COUNT; + file_start_write(file); + if (file->f_op->write) + ret = file->f_op->write(file, buf, count, pos); + else if (file->f_op->write_iter) + ret = new_sync_write(file, buf, count, pos); + else + ret = -EINVAL; + if (ret > 0) { + fsnotify_modify(file); + add_wchar(current, ret); } - + inc_syscw(current); + file_end_write(file); return ret; } From 61a707c543e2afe3aa7e88f87267c5dafa4b5afa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2020 08:54:16 +0200 Subject: [PATCH 239/343] fs: add a __kernel_read helper This is the counterpart to __kernel_write, and skip the rw_verify_area call compared to kernel_read. Signed-off-by: Christoph Hellwig --- fs/read_write.c | 23 +++++++++++++++++++++++ include/linux/fs.h | 1 + 2 files changed, 24 insertions(+) diff --git a/fs/read_write.c b/fs/read_write.c index 96e8e354f99b..21c9d90a257e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -430,6 +430,29 @@ ssize_t __vfs_read(struct file *file, char __user *buf, size_t count, return -EINVAL; } +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) +{ + mm_segment_t old_fs = get_fs(); + ssize_t ret; + + if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) + return -EINVAL; + if (!(file->f_mode & FMODE_CAN_READ)) + return -EINVAL; + + if (count > MAX_RW_COUNT) + count = MAX_RW_COUNT; + set_fs(KERNEL_DS); + ret = __vfs_read(file, (void __user *)buf, count, pos); + set_fs(old_fs); + if (ret > 0) { + fsnotify_access(file); + add_rchar(current, ret); + } + inc_syscr(current); + return ret; +} + ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) { mm_segment_t old_fs; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f881a892ea7..22cbe7b2e919 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3033,6 +3033,7 @@ extern int kernel_read_file_from_path_initns(const char *, void **, loff_t *, lo extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, enum kernel_read_file_id); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos); extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *); extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *); extern struct file * open_exec(const char *); From a1f9b1c0439db47ea10418b8ee7092a0d970cc2d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2020 08:54:27 +0200 Subject: [PATCH 240/343] integrity/ima: switch to using __kernel_read __kernel_read has a bunch of additional sanity checks, and this moves the set_fs out of non-core code. Signed-off-by: Christoph Hellwig --- security/integrity/iint.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/security/integrity/iint.c b/security/integrity/iint.c index e12c4900510f..1d20003243c3 100644 --- a/security/integrity/iint.c +++ b/security/integrity/iint.c @@ -188,19 +188,7 @@ DEFINE_LSM(integrity) = { int integrity_kernel_read(struct file *file, loff_t offset, void *addr, unsigned long count) { - mm_segment_t old_fs; - char __user *buf = (char __user *)addr; - ssize_t ret; - - if (!(file->f_mode & FMODE_READ)) - return -EBADF; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - ret = __vfs_read(file, buf, count, &offset); - set_fs(old_fs); - - return ret; + return __kernel_read(file, addr, count, &offset); } /* From 6209dd9132e8ea5545cffc84483841e88ea8cc5b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2020 09:00:28 +0200 Subject: [PATCH 241/343] fs: implement kernel_read using __kernel_read Consolidate the two in-kernel read helpers to make upcoming changes easier. The only difference are the missing call to rw_verify_area in kernel_read, and an access_ok check that doesn't make sense for kernel buffers to start with. Signed-off-by: Christoph Hellwig --- fs/read_write.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 21c9d90a257e..42a027719324 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -455,15 +455,12 @@ ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) { - mm_segment_t old_fs; - ssize_t result; + ssize_t ret; - old_fs = get_fs(); - set_fs(KERNEL_DS); - /* The cast to a user pointer is valid due to the set_fs() */ - result = vfs_read(file, (void __user *)buf, count, pos); - set_fs(old_fs); - return result; + ret = rw_verify_area(READ, file, pos, count); + if (ret) + return ret; + return __kernel_read(file, buf, count, pos); } EXPORT_SYMBOL(kernel_read); From 775802c0571fb438cd4f6548a323f9e4cb89f5aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2020 11:17:46 +0200 Subject: [PATCH 242/343] fs: remove __vfs_read Fold it into the two callers. Signed-off-by: Christoph Hellwig --- fs/read_write.c | 43 +++++++++++++++++++++---------------------- include/linux/fs.h | 1 - 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 42a027719324..4fb797822567 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -419,17 +419,6 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo return ret; } -ssize_t __vfs_read(struct file *file, char __user *buf, size_t count, - loff_t *pos) -{ - if (file->f_op->read) - return file->f_op->read(file, buf, count, pos); - else if (file->f_op->read_iter) - return new_sync_read(file, buf, count, pos); - else - return -EINVAL; -} - ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) { mm_segment_t old_fs = get_fs(); @@ -443,7 +432,12 @@ ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) if (count > MAX_RW_COUNT) count = MAX_RW_COUNT; set_fs(KERNEL_DS); - ret = __vfs_read(file, (void __user *)buf, count, pos); + if (file->f_op->read) + ret = file->f_op->read(file, (void __user *)buf, count, pos); + else if (file->f_op->read_iter) + ret = new_sync_read(file, (void __user *)buf, count, pos); + else + ret = -EINVAL; set_fs(old_fs); if (ret > 0) { fsnotify_access(file); @@ -476,17 +470,22 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) return -EFAULT; ret = rw_verify_area(READ, file, pos, count); - if (!ret) { - if (count > MAX_RW_COUNT) - count = MAX_RW_COUNT; - ret = __vfs_read(file, buf, count, pos); - if (ret > 0) { - fsnotify_access(file); - add_rchar(current, ret); - } - inc_syscr(current); - } + if (ret) + return ret; + if (count > MAX_RW_COUNT) + count = MAX_RW_COUNT; + if (file->f_op->read) + ret = file->f_op->read(file, buf, count, pos); + else if (file->f_op->read_iter) + ret = new_sync_read(file, buf, count, pos); + else + ret = -EINVAL; + if (ret > 0) { + fsnotify_access(file); + add_rchar(current, ret); + } + inc_syscr(current); return ret; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 22cbe7b2e919..0c0ec76b600b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1917,7 +1917,6 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, struct iovec *fast_pointer, struct iovec **ret_pointer); -extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, From ee769ebbe9e5fc7219e979fb7c5ed5bb5722649e Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Wed, 8 Jul 2020 06:20:23 +0000 Subject: [PATCH 243/343] xtensa: simplify xtensa_pmu_irq_handler Use for_each_set_bit() instead of open-coding it to simplify the code. Signed-off-by: Xu Wang Message-Id: <20200708062023.7986-1-vulab@iscas.ac.cn> Signed-off-by: Max Filippov --- arch/xtensa/kernel/perf_event.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c index 9bae79f70301..99fcd63ce597 100644 --- a/arch/xtensa/kernel/perf_event.c +++ b/arch/xtensa/kernel/perf_event.c @@ -362,9 +362,7 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id) struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); unsigned i; - for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS); - i < XCHAL_NUM_PERF_COUNTERS; - i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) { + for_each_set_bit(i, ev->used_mask, XCHAL_NUM_PERF_COUNTERS) { uint32_t v = get_er(XTENSA_PMU_PMSTAT(i)); struct perf_event *event = ev->event[i]; struct hw_perf_event *hwc = &event->hw; From 8668115cf2db40e22e7be02652a3673d8d30c9f0 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 7 Jul 2020 23:43:39 -0500 Subject: [PATCH 244/343] smb3: fix unneeded error message on change notify We should not be logging a warning repeatedly on change notify. CC: Stable # v5.6+ Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/smb2misc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 6a39451973f8..157992864ce7 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -354,9 +354,13 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_sync_hdr *shdr) ((struct smb2_ioctl_rsp *)shdr)->OutputCount); break; case SMB2_CHANGE_NOTIFY: + *off = le16_to_cpu( + ((struct smb2_change_notify_rsp *)shdr)->OutputBufferOffset); + *len = le32_to_cpu( + ((struct smb2_change_notify_rsp *)shdr)->OutputBufferLength); + break; default: - /* BB FIXME for unimplemented cases above */ - cifs_dbg(VFS, "no length check for command\n"); + cifs_dbg(VFS, "no length check for command %d\n", le16_to_cpu(shdr->Command)); break; } From 00debf8109e5fad3db31375be2a3c515e1461b4a Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Mon, 6 Jul 2020 22:47:13 +0800 Subject: [PATCH 245/343] drm/hisilicon/hibmc: Move drm_fbdev_generic_setup() down to avoid the splat The HiSilicon hibmc driver triggers a splat at boot time as below [ 14.137806] ------------[ cut here ]------------ [ 14.142405] hibmc-drm 0000:0a:00.0: Device has not been registered. [ 14.148661] WARNING: CPU: 0 PID: 496 at drivers/gpu/drm/drm_fb_helper.c:2233 drm_fbdev_generic_setup+0x15c/0x1b8 [ 14.158787] [...] [ 14.278307] Call trace: [ 14.280742] drm_fbdev_generic_setup+0x15c/0x1b8 [ 14.285337] hibmc_pci_probe+0x354/0x418 [ 14.289242] local_pci_probe+0x44/0x98 [ 14.292974] work_for_cpu_fn+0x20/0x30 [ 14.296708] process_one_work+0x1c4/0x4e0 [ 14.300698] worker_thread+0x2c8/0x528 [ 14.304431] kthread+0x138/0x140 [ 14.307646] ret_from_fork+0x10/0x18 [ 14.311205] ---[ end trace a2000ec2d838af4d ]--- This turned out to be due to the fbdev device hasn't been registered when drm_fbdev_generic_setup() is invoked. Let's fix the splat by moving it down after drm_dev_register() which will follow the "Display driver example" documented by commit de99f0600a79 ("drm/drv: DOC: Add driver example code"). Signed-off-by: Zenghui Yu Reviewed-by: Thomas Zimmermann Signed-off-by: Xinliang Liu Link: https://patchwork.freedesktop.org/patch/msgid/20200706144713.1123-1-yuzenghui@huawei.com --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index a6fd0c29e5b8..544b9993c99e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -307,8 +307,6 @@ static int hibmc_load(struct drm_device *dev) /* reset all the states of crtc/plane/encoder/connector */ drm_mode_config_reset(dev); - drm_fbdev_generic_setup(dev, dev->mode_config.preferred_depth); - return 0; err: @@ -355,6 +353,9 @@ static int hibmc_pci_probe(struct pci_dev *pdev, ret); goto err_unload; } + + drm_fbdev_generic_setup(dev, dev->mode_config.preferred_depth); + return 0; err_unload: From 4557ac6b344b8cdf948ff8b007e8e1de34832f2e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 8 Jul 2020 17:49:42 +1000 Subject: [PATCH 246/343] powerpc/64s/exception: Fix 0x1500 interrupt handler crash A typo caused the interrupt handler to branch immediately to the common "unknown interrupt" handler and skip the special case test for denormal cause. This does not affect KVM softpatch handling (e.g., for POWER9 TM assist) because the KVM test was moved to common code by commit 9600f261acaa ("powerpc/64s/exception: Move KVM test to common code") just before this bug was introduced. Fixes: 3f7fbd97d07d ("powerpc/64s/exception: Clean up SRR specifiers") Reported-by: Paul Menzel Signed-off-by: Nicholas Piggin Tested-by: Paul Menzel [mpe: Split selftest into a separate patch] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200708074942.1713396-1-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fa080694e581..0fc8bad878b2 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2551,7 +2551,7 @@ EXC_VIRT_NONE(0x5400, 0x100) INT_DEFINE_BEGIN(denorm_exception) IVEC=0x1500 IHSRR=1 - IBRANCH_COMMON=0 + IBRANCH_TO_COMMON=0 IKVM_REAL=1 INT_DEFINE_END(denorm_exception) From 04340645f69ab7abb6f9052688a60f0213b3f79c Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 7 Jul 2020 16:09:31 +0300 Subject: [PATCH 247/343] RDMA/siw: Fix reporting vendor_part_id Move the initialization of the vendor_part_id to be before calling ib_register_device(), this is needed because the query_device() callback is called from the context of ib_register_device() before initializing the vendor_part_id, so the reported value is wrong. Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface") Link: https://lore.kernel.org/r/20200707130931.444724-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index a0b8cc643c5c..ed60c9e4643e 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -67,12 +67,13 @@ static int siw_device_register(struct siw_device *sdev, const char *name) static int dev_id = 1; int rv; + sdev->vendor_part_id = dev_id++; + rv = ib_register_device(base_dev, name); if (rv) { pr_warn("siw: device registration error %d\n", rv); return rv; } - sdev->vendor_part_id = dev_id++; siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr); From 3ec2d5113ea5e182b759b613dc1f7108ba7aac9e Mon Sep 17 00:00:00 2001 From: Veerabhadrarao Badiganti Date: Wed, 8 Jul 2020 18:41:19 +0530 Subject: [PATCH 248/343] mmc: sdhci-msm: Override DLL_CONFIG only if the valid value is supplied During DLL initialization, the DLL_CONFIG register value would be updated with the value supplied from the device-tree. Override this register only if a valid value is supplied. Fixes: 03591160ca19 ("mmc: sdhci-msm: Read and use DLL Config property from device tree file") Signed-off-by: Veerabhadrarao Badiganti Link: https://lore.kernel.org/r/1594213888-2780-1-git-send-email-vbadigan@codeaurora.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-msm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index b277dd7fbdb5..c0d58e9fcc33 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -618,8 +618,9 @@ static int msm_init_cm_dll(struct sdhci_host *host) config &= ~CORE_CLK_PWRSAVE; writel_relaxed(config, host->ioaddr + msm_offset->core_vendor_spec); - config = msm_host->dll_config; - writel_relaxed(config, host->ioaddr + msm_offset->core_dll_config); + if (msm_host->dll_config) + writel_relaxed(msm_host->dll_config, + host->ioaddr + msm_offset->core_dll_config); if (msm_host->use_14lpp_dll_reset) { config = readl_relaxed(host->ioaddr + From 382761dc6312965a11f82f2217e16ec421bf17ae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 27 Jun 2020 09:31:46 +0200 Subject: [PATCH 249/343] dm: use bio_uninit instead of bio_disassociate_blkg bio_uninit is the proper API to clean up a BIO that has been allocated on stack or inside a structure that doesn't come from the BIO allocator. Switch dm to use that instead of bio_disassociate_blkg, which really is an implementation detail. Note that the bio_uninit calls are also moved to the two callers of __send_empty_flush, so that they better pair with the bio_init calls used to initialize them. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 446aff589732..4cc7e2599664 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1443,9 +1443,6 @@ static int __send_empty_flush(struct clone_info *ci) BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); - - bio_disassociate_blkg(ci->bio); - return 0; } @@ -1633,6 +1630,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, ci.bio = &flush_bio; ci.sector_count = 0; error = __send_empty_flush(&ci); + bio_uninit(ci.bio); /* dec_pending submits any data associated with flush */ } else if (op_is_zone_mgmt(bio_op(bio))) { ci.bio = bio; @@ -1707,6 +1705,7 @@ static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map, ci.bio = &flush_bio; ci.sector_count = 0; error = __send_empty_flush(&ci); + bio_uninit(ci.bio); /* dec_pending submits any data associated with flush */ } else { struct dm_target_io *tio; From a46624580376a3a0beb218d94cbc7f258696e29f Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Tue, 30 Jun 2020 17:49:24 +0200 Subject: [PATCH 250/343] dm writecache: reject asynchronous pmem devices DM writecache does not handle asynchronous pmem. Reject it when supplied as cache. Link: https://lore.kernel.org/linux-nvdimm/87lfk5hahc.fsf@linux.ibm.com/ Fixes: 6e84200c0a29 ("virtio-pmem: Add virtio pmem driver") Signed-off-by: Michal Suchanek Acked-by: Mikulas Patocka Cc: stable@vger.kernel.org # 5.3+ Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 30505d70f423..5358894bb9fd 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -2266,6 +2266,12 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) } if (WC_MODE_PMEM(wc)) { + if (!dax_synchronous(wc->ssd_dev->dax_dev)) { + r = -EOPNOTSUPP; + ti->error = "Asynchronous persistent memory not supported as pmem cache"; + goto bad; + } + r = persistent_memory_claim(wc); if (r) { ti->error = "Unable to map persistent memory for cache"; From ce34c9b461b50001892b0b348e024d2275014ede Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 2 Jul 2020 23:11:40 +0800 Subject: [PATCH 251/343] dm zoned: fix unused but set variable warnings Fix unused but set variable warnings: drivers/md/dm-zoned-reclaim.c:504:42: warning: variable nr_rnd set but not used [-Wunused-but-set-variable] 504 | unsigned int p_unmap, nr_unmap_rnd = 0, nr_rnd = 0; | ^~~~~~ drivers/md/dm-zoned-reclaim.c:504:24: warning: variable nr_unmap_rnd set but not used [-Wunused-but-set-variable] 504 | unsigned int p_unmap, nr_unmap_rnd = 0, nr_rnd = 0; | ^~~~~~~~~~~~ Fixes: f97809aec589 ("dm zoned: per-device reclaim") Signed-off-by: Wei Yongjun Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-reclaim.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index dd1eebf6e50f..7e0cc2d732cf 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -501,7 +501,7 @@ static void dmz_reclaim_work(struct work_struct *work) { struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work); struct dmz_metadata *zmd = zrc->metadata; - unsigned int p_unmap, nr_unmap_rnd = 0, nr_rnd = 0; + unsigned int p_unmap; int ret; if (dmz_dev_is_dying(zmd)) @@ -527,9 +527,6 @@ static void dmz_reclaim_work(struct work_struct *work) zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2); } - nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx); - nr_rnd = dmz_nr_rnd_zones(zmd, zrc->dev_idx); - DMDEBUG("(%s/%u): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)", dmz_metadata_label(zmd), zrc->dev_idx, zrc->kc_throttle.throttle, From 174364f6a8979655f71b04b6492657aec3762703 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 8 Jul 2020 09:20:22 +0900 Subject: [PATCH 252/343] dm zoned: Fix zone reclaim trigger Only triggering reclaim based on the percentage of unmapped cache zones can fail to detect cases where reclaim is needed, e.g. if the target has only 2 or 3 cache zones and only one unmapped cache zone, the percentage of free cache zones is higher than DMZ_RECLAIM_LOW_UNMAP_ZONES (30%) and reclaim does not trigger. This problem, combined with the fact that dmz_schedule_reclaim() is called from dmz_handle_bio() without the map lock held, leads to a race between zone allocation and dmz_should_reclaim() result. Depending on the workload applied, this race can lead to the write path waiting forever for a free zone without reclaim being triggered. Fix this by moving dmz_schedule_reclaim() inside dmz_alloc_zone() under the map lock. This results in checking the need for zone reclaim whenever a new data or buffer zone needs to be allocated. Also fix dmz_reclaim_percentage() to always return 0 if the number of unmapped cache (or random) zones is less than or equal to 1. Suggested-by: Shin'ichiro Kawasaki Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 9 ++++++++- drivers/md/dm-zoned-reclaim.c | 2 ++ drivers/md/dm-zoned-target.c | 10 +--------- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 5cf6f5f552e0..b298fefb022e 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -2217,8 +2217,15 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned int dev_idx, { struct list_head *list; struct dm_zone *zone; - int i = 0; + int i; + /* Schedule reclaim to ensure free zones are available */ + if (!(flags & DMZ_ALLOC_RECLAIM)) { + for (i = 0; i < zmd->nr_devs; i++) + dmz_schedule_reclaim(zmd->dev[i].reclaim); + } + + i = 0; again: if (flags & DMZ_ALLOC_CACHE) list = &zmd->unmap_cache_list; diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index 7e0cc2d732cf..9c0ecc9568a4 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -456,6 +456,8 @@ static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc) nr_zones = dmz_nr_rnd_zones(zmd, zrc->dev_idx); nr_unmap = dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx); } + if (nr_unmap <= 1) + return 0; return nr_unmap * 100 / nr_zones; } diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index cf915009c306..42aa5139df7c 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -400,15 +400,7 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw, dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); struct dmz_metadata *zmd = dmz->metadata; struct dm_zone *zone; - int i, ret; - - /* - * Write may trigger a zone allocation. So make sure the - * allocation can succeed. - */ - if (bio_op(bio) == REQ_OP_WRITE) - for (i = 0; i < dmz->nr_ddevs; i++) - dmz_schedule_reclaim(dmz->dev[i].reclaim); + int ret; dmz_lock_metadata(zmd); From 6958c1c640af8c3f40fa8a2eee3b5b905d95b677 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 8 Jul 2020 12:25:20 -0400 Subject: [PATCH 253/343] dm: use noio when sending kobject event kobject_uevent may allocate memory and it may be called while there are dm devices suspended. The allocation may recurse into a suspended device, causing a deadlock. We must set the noio flag when sending a uevent. The observed deadlock was reported here: https://www.redhat.com/archives/dm-devel/2020-March/msg00025.html Reported-by: Khazhismel Kumykov Reported-by: Tahsin Erdogan Reported-by: Gabriel Krisman Bertazi Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4cc7e2599664..52449afd58eb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -2939,17 +2940,25 @@ EXPORT_SYMBOL_GPL(dm_internal_resume_fast); int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, unsigned cookie) { + int r; + unsigned noio_flag; char udev_cookie[DM_COOKIE_LENGTH]; char *envp[] = { udev_cookie, NULL }; + noio_flag = memalloc_noio_save(); + if (!cookie) - return kobject_uevent(&disk_to_dev(md->disk)->kobj, action); + r = kobject_uevent(&disk_to_dev(md->disk)->kobj, action); else { snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", DM_COOKIE_ENV_VAR_NAME, cookie); - return kobject_uevent_env(&disk_to_dev(md->disk)->kobj, - action, envp); + r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj, + action, envp); } + + memalloc_noio_restore(noio_flag); + + return r; } uint32_t dm_next_uevent_seq(struct mapped_device *md) From 6ec4476ac82512f09c94aff5972654b70f3772b2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Jul 2020 10:48:35 -0700 Subject: [PATCH 254/343] Raise gcc version requirement to 4.9 I realize that we fairly recently raised it to 4.8, but the fact is, 4.9 is a much better minimum version to target. We have a number of workarounds for actual bugs in pre-4.9 gcc versions (including things like internal compiler errors on ARM), but we also have some syntactic workarounds for lacking features. In particular, raising the minimum to 4.9 means that we can now just assume _Generic() exists, which is likely the much better replacement for a lot of very convoluted built-time magic with conditionals on sizeof and/or __builtin_choose_expr() with same_type() etc. Using _Generic also means that you will need to have a very recent version of 'sparse', but thats easy to build yourself, and much less of a hassle than some old gcc version can be. The latest (in a long string) of reasons for minimum compiler version upgrades was commit 5435f73d5c4a ("efi/x86: Fix build with gcc 4"). Ard points out that RHEL 7 uses gcc-4.8, but the people who stay back on old RHEL versions persumably also don't build their own kernels anyway. And maybe they should cross-built or just have a little side affair with a newer compiler? Acked-by: Ard Biesheuvel Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- arch/arm/kernel/asm-offsets.c | 9 --------- arch/mips/include/asm/unroll.h | 7 +++---- include/linux/bits.h | 3 +-- include/linux/compiler-gcc.h | 2 +- include/linux/compiler_types.h | 27 +-------------------------- mm/migrate.c | 13 +------------ tools/include/linux/bits.h | 3 +-- 7 files changed, 8 insertions(+), 56 deletions(-) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index c036a4a2f8e2..a1570c8bab25 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -31,15 +31,6 @@ #if defined(__APCS_26__) #error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32 #endif -/* - * GCC 4.8.0-4.8.2: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58854 - * miscompiles find_get_entry(), and can result in EXT3 and EXT4 - * filesystem corruption (possibly other FS too). - */ -#if defined(GCC_VERSION) && GCC_VERSION >= 40800 && GCC_VERSION < 40803 -#error Your compiler is too buggy; it is known to miscompile kernels -#error and result in filesystem corruption and oopses. -#endif int main(void) { diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h index c628747d4ecd..8ed660adc84f 100644 --- a/arch/mips/include/asm/unroll.h +++ b/arch/mips/include/asm/unroll.h @@ -19,14 +19,13 @@ \ /* \ * We can't unroll if the number of iterations isn't \ - * compile-time constant. Unfortunately GCC versions \ - * up until 4.6 tend to miss obvious constants & cause \ + * compile-time constant. Unfortunately clang versions \ + * up until 8.0 tend to miss obvious constants & cause \ * this check to fail, even though they go on to \ * generate reasonable code for the switch statement, \ * so we skip the sanity check for those compilers. \ */ \ - BUILD_BUG_ON((CONFIG_GCC_VERSION >= 40700 || \ - CONFIG_CLANG_VERSION >= 80000) && \ + BUILD_BUG_ON((CONFIG_CLANG_VERSION >= 80000) && \ !__builtin_constant_p(times)); \ \ switch (times) { \ diff --git a/include/linux/bits.h b/include/linux/bits.h index 4671fbf28842..7f475d59a097 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -18,8 +18,7 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#if !defined(__ASSEMBLY__) && \ - (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000) +#if !defined(__ASSEMBLY__) #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 1c74464c80c6..0b1dc61f3955 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -11,7 +11,7 @@ + __GNUC_PATCHLEVEL__) /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ -#if GCC_VERSION < 40800 +#if GCC_VERSION < 40900 # error Sorry, your compiler is too old - please upgrade it. #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index c3bf7710f69a..01dd58c74d80 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -252,32 +252,8 @@ struct ftrace_likely_data { * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving * non-scalar types unchanged. */ -#if (defined(CONFIG_CC_IS_GCC) && CONFIG_GCC_VERSION < 40900) || defined(__CHECKER__) /* - * We build this out of a couple of helper macros in a vain attempt to - * help you keep your lunch down while reading it. - */ -#define __pick_scalar_type(x, type, otherwise) \ - __builtin_choose_expr(__same_type(x, type), (type)0, otherwise) - -/* - * 'char' is not type-compatible with either 'signed char' or 'unsigned char', - * so we include the naked type here as well as the signed/unsigned variants. - */ -#define __pick_integer_type(x, type, otherwise) \ - __pick_scalar_type(x, type, \ - __pick_scalar_type(x, unsigned type, \ - __pick_scalar_type(x, signed type, otherwise))) - -#define __unqual_scalar_typeof(x) typeof( \ - __pick_integer_type(x, char, \ - __pick_integer_type(x, short, \ - __pick_integer_type(x, int, \ - __pick_integer_type(x, long, \ - __pick_integer_type(x, long long, x)))))) -#else -/* - * If supported, prefer C11 _Generic for better compile-times. As above, 'char' + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' * is not type-compatible with 'signed char', and we define a separate case. */ #define __scalar_type_to_expr_cases(type) \ @@ -293,7 +269,6 @@ struct ftrace_likely_data { __scalar_type_to_expr_cases(long), \ __scalar_type_to_expr_cases(long long), \ default: (x))) -#endif /* Is this type a native word size -- useful for atomic operations */ #define __native_word(t) \ diff --git a/mm/migrate.c b/mm/migrate.c index f37729673558..40cd7016ae6f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1160,22 +1160,11 @@ static int __unmap_and_move(struct page *page, struct page *newpage, return rc; } -/* - * gcc 4.7 and 4.8 on arm get an ICEs when inlining unmap_and_move(). Work - * around it. - */ -#if defined(CONFIG_ARM) && \ - defined(GCC_VERSION) && GCC_VERSION < 40900 && GCC_VERSION >= 40700 -#define ICE_noinline noinline -#else -#define ICE_noinline -#endif - /* * Obtain the lock on page, remove all ptes and migrate the page * to the newly allocated page in newpage. */ -static ICE_noinline int unmap_and_move(new_page_t get_new_page, +static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, unsigned long private, struct page *page, int force, enum migrate_mode mode, diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 4671fbf28842..7f475d59a097 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -18,8 +18,7 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#if !defined(__ASSEMBLY__) && \ - (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000) +#if !defined(__ASSEMBLY__) #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ From 0bddd227f3dc55975e2b8dfa7fc6f959b062a2c7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 8 Jul 2020 11:44:59 -0700 Subject: [PATCH 255/343] Documentation: update for gcc 4.9 requirement Update Documentation for the gcc v4.9 upgrade requirement. Fixes: 5429ef62bcf3 ("compiler/gcc: Raise minimum GCC version for kernel builds to 4.8") Fixes: 6ec4476ac825 ("Raise gcc version requirement to 4.9") Signed-off-by: Randy Dunlap Acked-by: Jonathan Corbet Signed-off-by: Linus Torvalds --- Documentation/admin-guide/README.rst | 2 +- Documentation/process/changes.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index 5fb526900023..5aad534233cd 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -258,7 +258,7 @@ Configuring the kernel Compiling the kernel -------------------- - - Make sure you have at least gcc 4.6 available. + - Make sure you have at least gcc 4.9 available. For more information, refer to :ref:`Documentation/process/changes.rst `. Please note that you can still run a.out user programs with this kernel. diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 5cfb54c2aaa6..8f68e728ae6b 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -29,7 +29,7 @@ you probably needn't concern yourself with pcmciautils. ====================== =============== ======================================== Program Minimal version Command to check the version ====================== =============== ======================================== -GNU C 4.8 gcc --version +GNU C 4.9 gcc --version GNU make 3.81 make --version binutils 2.23 ld -v flex 2.5.35 flex --version From a42e6aee7f47a8a68d09923c720fc8f605a04207 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Wed, 8 Jul 2020 17:17:10 +0300 Subject: [PATCH 256/343] net: atlantic: fix ip dst and ipv6 address filters This patch fixes ip dst and ipv6 address filters. There were 2 mistakes in the code, which led to the issue: * invalid register was used for ipv4 dst address; * incorrect write order of dwords for ipv6 addresses. Fixes: 23e7a718a49b ("net: aquantia: add rx-flow filter definitions") Signed-off-by: Dmitry Bogdanov Signed-off-by: Mark Starovoytov Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c | 4 ++-- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c index 3c8e8047ea1e..d775b23025c1 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c @@ -1700,7 +1700,7 @@ void hw_atl_rpfl3l4_ipv6_src_addr_set(struct aq_hw_s *aq_hw, u8 location, for (i = 0; i < 4; ++i) aq_hw_write_reg(aq_hw, HW_ATL_RPF_L3_SRCA_ADR(location + i), - ipv6_src[i]); + ipv6_src[3 - i]); } void hw_atl_rpfl3l4_ipv6_dest_addr_set(struct aq_hw_s *aq_hw, u8 location, @@ -1711,7 +1711,7 @@ void hw_atl_rpfl3l4_ipv6_dest_addr_set(struct aq_hw_s *aq_hw, u8 location, for (i = 0; i < 4; ++i) aq_hw_write_reg(aq_hw, HW_ATL_RPF_L3_DSTA_ADR(location + i), - ipv6_dest[i]); + ipv6_dest[3 - i]); } u32 hw_atl_sem_ram_get(struct aq_hw_s *self) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h index 06220792daf1..7430ff025134 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h @@ -1360,7 +1360,7 @@ */ /* Register address for bitfield pif_rpf_l3_da0_i[31:0] */ -#define HW_ATL_RPF_L3_DSTA_ADR(filter) (0x000053B0 + (filter) * 0x4) +#define HW_ATL_RPF_L3_DSTA_ADR(filter) (0x000053D0 + (filter) * 0x4) /* Bitmask for bitfield l3_da0[1F:0] */ #define HW_ATL_RPF_L3_DSTA_MSK 0xFFFFFFFFu /* Inverted bitmask for bitfield l3_da0[1F:0] */ From 6778a6bed09b58beca936a675e9dd195c0986580 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 8 Jul 2020 17:05:11 +0200 Subject: [PATCH 257/343] net/smc: separate LLC wait queues for flow and messages There might be races in scenarios where both SMC link groups are on the same system. Prevent that by creating separate wait queues for LLC flows and messages. Switch to non-interruptable versions of wait_event() and wake_up() for the llc flow waiter to make sure the waiters get control sequentially. Fine tune the llc_flow_lock to include the assignment of the message. Write to system log when an unexpected message was dropped. And remove an extra indirection and use the existing local variable lgr in smc_llc_enqueue(). Fixes: 555da9af827d ("net/smc: add event-based llc_flow framework") Reviewed-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 38 ++++++++++++--------- net/smc/smc_core.h | 4 ++- net/smc/smc_llc.c | 83 +++++++++++++++++++++++++++++----------------- 3 files changed, 77 insertions(+), 48 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 7964a21e5e6f..d695ce71837e 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -247,7 +247,8 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) if (smc_link_usable(lnk)) lnk->state = SMC_LNK_INACTIVE; } - wake_up_interruptible_all(&lgr->llc_waiter); + wake_up_all(&lgr->llc_msg_waiter); + wake_up_all(&lgr->llc_flow_waiter); } static void smc_lgr_free(struct smc_link_group *lgr); @@ -1130,18 +1131,19 @@ static void smcr_link_up(struct smc_link_group *lgr, return; if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { /* some other llc task is ongoing */ - wait_event_interruptible_timeout(lgr->llc_waiter, - (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), + wait_event_timeout(lgr->llc_flow_waiter, + (list_empty(&lgr->list) || + lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), SMC_LLC_WAIT_TIME); } - if (list_empty(&lgr->list) || - !smc_ib_port_active(smcibdev, ibport)) - return; /* lgr or device no longer active */ - link = smc_llc_usable_link(lgr); - if (!link) - return; - smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid, - NULL, SMC_LLC_REQ); + /* lgr or device no longer active? */ + if (!list_empty(&lgr->list) && + smc_ib_port_active(smcibdev, ibport)) + link = smc_llc_usable_link(lgr); + if (link) + smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], + gid, NULL, SMC_LLC_REQ); + wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ } } @@ -1195,13 +1197,17 @@ static void smcr_link_down(struct smc_link *lnk) if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { /* another llc task is ongoing */ mutex_unlock(&lgr->llc_conf_mutex); - wait_event_interruptible_timeout(lgr->llc_waiter, - (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), + wait_event_timeout(lgr->llc_flow_waiter, + (list_empty(&lgr->list) || + lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), SMC_LLC_WAIT_TIME); mutex_lock(&lgr->llc_conf_mutex); } - smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true, - SMC_LLC_DEL_LOST_PATH); + if (!list_empty(&lgr->list)) + smc_llc_send_delete_link(to_lnk, del_link_id, + SMC_LLC_REQ, true, + SMC_LLC_DEL_LOST_PATH); + wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ } } @@ -1262,7 +1268,7 @@ static void smc_link_down_work(struct work_struct *work) if (list_empty(&lgr->list)) return; - wake_up_interruptible_all(&lgr->llc_waiter); + wake_up_all(&lgr->llc_msg_waiter); mutex_lock(&lgr->llc_conf_mutex); smcr_link_down(link); mutex_unlock(&lgr->llc_conf_mutex); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 86d160f0d187..c3ff512fd891 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -262,8 +262,10 @@ struct smc_link_group { struct work_struct llc_del_link_work; struct work_struct llc_event_work; /* llc event worker */ - wait_queue_head_t llc_waiter; + wait_queue_head_t llc_flow_waiter; /* w4 next llc event */ + wait_queue_head_t llc_msg_waiter; + /* w4 next llc msg */ struct smc_llc_flow llc_flow_lcl; /* llc local control field */ struct smc_llc_flow llc_flow_rmt; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 391237b601fe..df164232574b 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -186,6 +186,26 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow, flow->qentry = qentry; } +static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, + struct smc_llc_qentry *qentry) +{ + u8 msg_type = qentry->msg.raw.hdr.common.type; + + if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) && + flow_type != msg_type && !lgr->delayed_event) { + lgr->delayed_event = qentry; + return; + } + /* drop parallel or already-in-progress llc requests */ + if (flow_type != msg_type) + pr_warn_once("smc: SMC-R lg %*phN dropped parallel " + "LLC msg: msg %d flow %d role %d\n", + SMC_LGR_ID_SIZE, &lgr->id, + qentry->msg.raw.hdr.common.type, + flow_type, lgr->role); + kfree(qentry); +} + /* try to start a new llc flow, initiated by an incoming llc msg */ static bool smc_llc_flow_start(struct smc_llc_flow *flow, struct smc_llc_qentry *qentry) @@ -195,14 +215,7 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, spin_lock_bh(&lgr->llc_flow_lock); if (flow->type) { /* a flow is already active */ - if ((qentry->msg.raw.hdr.common.type == SMC_LLC_ADD_LINK || - qentry->msg.raw.hdr.common.type == SMC_LLC_DELETE_LINK) && - !lgr->delayed_event) { - lgr->delayed_event = qentry; - } else { - /* forget this llc request */ - kfree(qentry); - } + smc_llc_flow_parallel(lgr, flow->type, qentry); spin_unlock_bh(&lgr->llc_flow_lock); return false; } @@ -222,8 +235,8 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, } if (qentry == lgr->delayed_event) lgr->delayed_event = NULL; - spin_unlock_bh(&lgr->llc_flow_lock); smc_llc_flow_qentry_set(flow, qentry); + spin_unlock_bh(&lgr->llc_flow_lock); return true; } @@ -251,11 +264,11 @@ int smc_llc_flow_initiate(struct smc_link_group *lgr, return 0; } spin_unlock_bh(&lgr->llc_flow_lock); - rc = wait_event_interruptible_timeout(lgr->llc_waiter, - (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE && - (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE || - lgr->llc_flow_rmt.type == allowed_remote)), - SMC_LLC_WAIT_TIME); + rc = wait_event_timeout(lgr->llc_flow_waiter, (list_empty(&lgr->list) || + (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE && + (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE || + lgr->llc_flow_rmt.type == allowed_remote))), + SMC_LLC_WAIT_TIME * 10); if (!rc) return -ETIMEDOUT; goto again; @@ -272,7 +285,7 @@ void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow) flow == &lgr->llc_flow_lcl) schedule_work(&lgr->llc_event_work); else - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_flow_waiter); } /* lnk is optional and used for early wakeup when link goes down, useful in @@ -283,26 +296,32 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, int time_out, u8 exp_msg) { struct smc_llc_flow *flow = &lgr->llc_flow_lcl; + u8 rcv_msg; - wait_event_interruptible_timeout(lgr->llc_waiter, - (flow->qentry || - (lnk && !smc_link_usable(lnk)) || - list_empty(&lgr->list)), - time_out); + wait_event_timeout(lgr->llc_msg_waiter, + (flow->qentry || + (lnk && !smc_link_usable(lnk)) || + list_empty(&lgr->list)), + time_out); if (!flow->qentry || (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) { smc_llc_flow_qentry_del(flow); goto out; } - if (exp_msg && flow->qentry->msg.raw.hdr.common.type != exp_msg) { + rcv_msg = flow->qentry->msg.raw.hdr.common.type; + if (exp_msg && rcv_msg != exp_msg) { if (exp_msg == SMC_LLC_ADD_LINK && - flow->qentry->msg.raw.hdr.common.type == - SMC_LLC_DELETE_LINK) { + rcv_msg == SMC_LLC_DELETE_LINK) { /* flow_start will delay the unexpected msg */ smc_llc_flow_start(&lgr->llc_flow_lcl, smc_llc_flow_qentry_clr(flow)); return NULL; } + pr_warn_once("smc: SMC-R lg %*phN dropped unexpected LLC msg: " + "msg %d exp %d flow %d role %d flags %x\n", + SMC_LGR_ID_SIZE, &lgr->id, rcv_msg, exp_msg, + flow->type, lgr->role, + flow->qentry->msg.raw.hdr.flags); smc_llc_flow_qentry_del(flow); } out: @@ -1459,7 +1478,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) /* a flow is waiting for this message */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_msg_waiter); } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { schedule_work(&lgr->llc_add_link_work); @@ -1474,7 +1493,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { /* a flow is waiting for this message */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_msg_waiter); return; } break; @@ -1485,7 +1504,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) /* DEL LINK REQ during ADD LINK SEQ */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_msg_waiter); } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { schedule_work(&lgr->llc_del_link_work); @@ -1496,7 +1515,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) /* DEL LINK REQ during ADD LINK SEQ */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_msg_waiter); } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { schedule_work(&lgr->llc_del_link_work); @@ -1581,7 +1600,7 @@ static void smc_llc_rx_response(struct smc_link *link, case SMC_LLC_DELETE_RKEY: /* assign responses to the local flow, we requested them */ smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&link->lgr->llc_waiter); + wake_up(&link->lgr->llc_msg_waiter); return; case SMC_LLC_CONFIRM_RKEY_CONT: /* not used because max links is 3 */ @@ -1616,7 +1635,7 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) spin_lock_irqsave(&lgr->llc_event_q_lock, flags); list_add_tail(&qentry->list, &lgr->llc_event_q); spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags); - schedule_work(&link->lgr->llc_event_work); + schedule_work(&lgr->llc_event_work); } /* copy received msg and add it to the event queue */ @@ -1677,7 +1696,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) INIT_LIST_HEAD(&lgr->llc_event_q); spin_lock_init(&lgr->llc_event_q_lock); spin_lock_init(&lgr->llc_flow_lock); - init_waitqueue_head(&lgr->llc_waiter); + init_waitqueue_head(&lgr->llc_flow_waiter); + init_waitqueue_head(&lgr->llc_msg_waiter); mutex_init(&lgr->llc_conf_mutex); lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time; } @@ -1686,7 +1706,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) void smc_llc_lgr_clear(struct smc_link_group *lgr) { smc_llc_event_flush(lgr); - wake_up_interruptible_all(&lgr->llc_waiter); + wake_up_all(&lgr->llc_flow_waiter); + wake_up_all(&lgr->llc_msg_waiter); cancel_work_sync(&lgr->llc_event_work); cancel_work_sync(&lgr->llc_add_link_work); cancel_work_sync(&lgr->llc_del_link_work); From b7eede757883a9892dcb7bf0280f4890fc74bcf6 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 8 Jul 2020 17:05:12 +0200 Subject: [PATCH 258/343] net/smc: fix work request handling Wait for pending sends only when smc_switch_conns() found a link to move the connections to. Do not wait during link freeing, this can lead to permanent hang situations. And refuse to provide a new tx slot on an unusable link. Fixes: c6f02ebeea3a ("net/smc: switch connections to alternate link") Reviewed-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_llc.c | 8 ++++---- net/smc/smc_wr.c | 10 ++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index df164232574b..c1a038689c63 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1241,8 +1241,8 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr) smc_llc_send_message(lnk, &qentry->msg); /* response */ if (smc_link_downing(&lnk_del->state)) { - smc_switch_conns(lgr, lnk_del, false); - smc_wr_tx_wait_no_pending_sends(lnk_del); + if (smc_switch_conns(lgr, lnk_del, false)) + smc_wr_tx_wait_no_pending_sends(lnk_del); } smcr_link_clear(lnk_del, true); @@ -1316,8 +1316,8 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) goto out; /* asymmetric link already deleted */ if (smc_link_downing(&lnk_del->state)) { - smc_switch_conns(lgr, lnk_del, false); - smc_wr_tx_wait_no_pending_sends(lnk_del); + if (smc_switch_conns(lgr, lnk_del, false)) + smc_wr_tx_wait_no_pending_sends(lnk_del); } if (!list_empty(&lgr->list)) { /* qentry is either a request from peer (send it back to diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 7239ba9b99dc..1e23cdd41eb1 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -169,6 +169,8 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context) static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) { *idx = link->wr_tx_cnt; + if (!smc_link_usable(link)) + return -ENOLINK; for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { if (!test_and_set_bit(*idx, link->wr_tx_mask)) return 0; @@ -560,15 +562,15 @@ void smc_wr_free_link(struct smc_link *lnk) { struct ib_device *ibdev; + if (!lnk->smcibdev) + return; + ibdev = lnk->smcibdev->ibdev; + if (smc_wr_tx_wait_no_pending_sends(lnk)) memset(lnk->wr_tx_mask, 0, BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); - if (!lnk->smcibdev) - return; - ibdev = lnk->smcibdev->ibdev; - if (lnk->wr_rx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, From 92f3cb0e11dda530d1daa42d7a11af5a92ed89e4 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 8 Jul 2020 17:05:13 +0200 Subject: [PATCH 259/343] net/smc: fix sleep bug in smc_pnet_find_roce_resource() Tests showed this BUG: [572555.252867] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:935 [572555.252876] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 131031, name: smcapp [572555.252879] INFO: lockdep is turned off. [572555.252883] CPU: 1 PID: 131031 Comm: smcapp Tainted: G O 5.7.0-rc3uschi+ #356 [572555.252885] Hardware name: IBM 3906 M03 703 (LPAR) [572555.252887] Call Trace: [572555.252896] [<00000000ac364554>] show_stack+0x94/0xe8 [572555.252901] [<00000000aca1f400>] dump_stack+0xa0/0xe0 [572555.252906] [<00000000ac3c8c10>] ___might_sleep+0x260/0x280 [572555.252910] [<00000000acdc0c98>] __mutex_lock+0x48/0x940 [572555.252912] [<00000000acdc15c2>] mutex_lock_nested+0x32/0x40 [572555.252975] [<000003ff801762d0>] mlx5_lag_get_roce_netdev+0x30/0xc0 [mlx5_core] [572555.252996] [<000003ff801fb3aa>] mlx5_ib_get_netdev+0x3a/0xe0 [mlx5_ib] [572555.253007] [<000003ff80063848>] smc_pnet_find_roce_resource+0x1d8/0x310 [smc] [572555.253011] [<000003ff800602f0>] __smc_connect+0x1f0/0x3e0 [smc] [572555.253015] [<000003ff80060634>] smc_connect+0x154/0x190 [smc] [572555.253022] [<00000000acbed8d4>] __sys_connect+0x94/0xd0 [572555.253025] [<00000000acbef620>] __s390x_sys_socketcall+0x170/0x360 [572555.253028] [<00000000acdc6800>] system_call+0x298/0x2b8 [572555.253030] INFO: lockdep is turned off. Function smc_pnet_find_rdma_dev() might be called from smc_pnet_find_roce_resource(). It holds the smc_ib_devices list spinlock while calling infiniband op get_netdev(). At least for mlx5 the get_netdev operation wants mutex serialization, which conflicts with the smc_ib_devices spinlock. This patch switches the smc_ib_devices spinlock into a mutex to allow sleeping when calling get_netdev(). Fixes: a4cf0443c414 ("smc: introduce SMC as an IB-client") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 5 +++-- net/smc/smc_ib.c | 11 ++++++----- net/smc/smc_ib.h | 3 ++- net/smc/smc_pnet.c | 21 +++++++++++---------- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d695ce71837e..8bf34d9f27e5 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1961,14 +1962,14 @@ static void smc_core_going_away(void) struct smc_ib_device *smcibdev; struct smcd_dev *smcd; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { int i; for (i = 0; i < SMC_MAX_PORTS; i++) set_bit(i, smcibdev->ports_going_away); } - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); spin_lock(&smcd_dev_list.lock); list_for_each_entry(smcd, &smcd_dev_list.list, list) { diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 562a52d01ad1..7637fdebbb78 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -33,7 +34,7 @@ #define SMC_QP_RNR_RETRY 7 /* 7: infinite */ struct smc_ib_devices smc_ib_devices = { /* smc-registered ib devices */ - .lock = __SPIN_LOCK_UNLOCKED(smc_ib_devices.lock), + .mutex = __MUTEX_INITIALIZER(smc_ib_devices.mutex), .list = LIST_HEAD_INIT(smc_ib_devices.list), }; @@ -565,9 +566,9 @@ static int smc_ib_add_dev(struct ib_device *ibdev) INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work); atomic_set(&smcibdev->lnk_cnt, 0); init_waitqueue_head(&smcibdev->lnks_deleted); - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_add_tail(&smcibdev->list, &smc_ib_devices.list); - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); ib_set_client_data(ibdev, &smc_ib_client, smcibdev); INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, smc_ib_global_event_handler); @@ -602,9 +603,9 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) { struct smc_ib_device *smcibdev = client_data; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); pr_warn_ratelimited("smc: removing ib device %s\n", smcibdev->ibdev->name); smc_smcr_terminate_all(smcibdev); diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index e6a696ae15f3..ae6776e1e726 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -25,7 +26,7 @@ struct smc_ib_devices { /* list of smc ib devices definition */ struct list_head list; - spinlock_t lock; /* protects list of smc ib devices */ + struct mutex mutex; /* protects list of smc ib devices */ }; extern struct smc_ib_devices smc_ib_devices; /* list of smc ib devices */ diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 014d91b9778e..d4aac31d39f5 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -129,7 +130,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) return rc; /* remove ib devices */ - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { if (ibdev->pnetid_by_user[ibport] && @@ -149,7 +150,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) } } } - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); /* remove smcd devices */ spin_lock(&smcd_dev_list.lock); list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { @@ -240,14 +241,14 @@ static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; bool applied = false; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { memcpy(ib_dev->pnetid[ib_port - 1], pnet_name, SMC_MAX_PNETID_LEN); ib_dev->pnetid_by_user[ib_port - 1] = true; applied = true; } - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); return applied; } @@ -300,7 +301,7 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) { struct smc_ib_device *ibdev; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (!strncmp(ibdev->ibdev->name, ib_name, sizeof(ibdev->ibdev->name)) || @@ -311,7 +312,7 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) } ibdev = NULL; out: - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); return ibdev; } @@ -825,7 +826,7 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, int i; ini->ib_dev = NULL; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (ibdev == known_dev) continue; @@ -844,7 +845,7 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, } } out: - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); } /* find alternate roce device with same pnet_id and vlan_id */ @@ -863,7 +864,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, { struct smc_ib_device *ibdev; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { struct net_device *ndev; int i; @@ -888,7 +889,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, } } } - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); } /* Determine the corresponding IB device port based on the hardware PNETID. From 82087c0330534d18e6db25869871e589d214b7fa Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 8 Jul 2020 17:05:14 +0200 Subject: [PATCH 260/343] net/smc: switch smcd_dev_list spinlock to mutex The similar smc_ib_devices spinlock has been converted to a mutex. Protecting the smcd_dev_list by a mutex is possible as well. This patch converts the smcd_dev_list spinlock to a mutex. Fixes: c6ba7c9ba43d ("net/smc: add base infrastructure for SMC-D and ISM") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 8 ++++---- net/smc/smc_ism.c | 11 ++++++----- net/smc/smc_ism.h | 3 ++- net/smc/smc_pnet.c | 16 ++++++++-------- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 8bf34d9f27e5..f69d205b3e11 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1971,11 +1971,11 @@ static void smc_core_going_away(void) } mutex_unlock(&smc_ib_devices.mutex); - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd, &smcd_dev_list.list, list) { smcd->going_away = 1; } - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); } /* Clean up all SMC link groups */ @@ -1987,10 +1987,10 @@ static void smc_lgrs_shutdown(void) smc_smcr_terminate_all(NULL); - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd, &smcd_dev_list.list, list) smc_smcd_terminate_all(smcd); - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); } static int smc_core_reboot_event(struct notifier_block *this, diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 91f85fc09fb8..998c525de785 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -7,6 +7,7 @@ */ #include +#include #include #include @@ -17,7 +18,7 @@ struct smcd_dev_list smcd_dev_list = { .list = LIST_HEAD_INIT(smcd_dev_list.list), - .lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock) + .mutex = __MUTEX_INITIALIZER(smcd_dev_list.mutex) }; /* Test if an ISM communication is possible. */ @@ -317,9 +318,9 @@ EXPORT_SYMBOL_GPL(smcd_alloc_dev); int smcd_register_dev(struct smcd_dev *smcd) { - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_add_tail(&smcd->list, &smcd_dev_list.list); - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n", dev_name(&smcd->dev), smcd->pnetid, @@ -333,9 +334,9 @@ void smcd_unregister_dev(struct smcd_dev *smcd) { pr_warn_ratelimited("smc: removing smcd device %s\n", dev_name(&smcd->dev)); - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_del_init(&smcd->list); - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); smcd->going_away = 1; smc_smcd_terminate_all(smcd); flush_workqueue(smcd->event_wq); diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 4da946cbfa29..81cc4537efd3 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -10,12 +10,13 @@ #define SMCD_ISM_H #include +#include #include "smc.h" struct smcd_dev_list { /* List of SMCD devices */ struct list_head list; - spinlock_t lock; /* Protects list of devices */ + struct mutex mutex; /* Protects list of devices */ }; extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */ diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index d4aac31d39f5..30e5fac7034e 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -152,7 +152,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) } mutex_unlock(&smc_ib_devices.mutex); /* remove smcd devices */ - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { if (smcd_dev->pnetid_by_user && (!pnet_name || @@ -166,7 +166,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) rc = 0; } } - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); return rc; } @@ -259,13 +259,13 @@ static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name) u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; bool applied = false; - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN); smcd_dev->pnetid_by_user = true; applied = true; } - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); return applied; } @@ -321,7 +321,7 @@ static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) { struct smcd_dev *smcd_dev; - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, IB_DEVICE_NAME_MAX - 1)) @@ -329,7 +329,7 @@ static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) } smcd_dev = NULL; out: - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); return smcd_dev; } @@ -925,7 +925,7 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) return; /* pnetid could not be determined */ - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(ismdev, &smcd_dev_list.list, list) { if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && !ismdev->going_away) { @@ -933,7 +933,7 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, break; } } - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); } /* PNET table analysis for a given sock: From fb4f79264c0fc6fd5a68ffe3e31bfff97311e1f1 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 8 Jul 2020 17:05:15 +0200 Subject: [PATCH 261/343] net/smc: tolerate future SMCD versions CLC proposal messages of future SMCD versions could be larger than SMCD V1 CLC proposal messages. To enable toleration in SMC V1 the receival of CLC proposal messages is adapted: * accept larger length values in CLC proposal * check trailing eye catcher for incoming CLC proposal with V1 length only * receive the whole CLC proposal even in cases it does not fit into the V1 buffer Fixes: e7b7a64a8493d ("smc: support variable CLC proposal messages") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_clc.c | 45 ++++++++++++++++++++++++++++++++------------- net/smc/smc_clc.h | 2 ++ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index d5627df24215..779f4142a11d 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -27,6 +27,7 @@ #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 +#define SMC_CLC_RECV_BUF_LEN 100 /* eye catcher "SMCR" EBCDIC for CLC messages */ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; @@ -36,7 +37,7 @@ static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; /* check if received message has a correct header length and contains valid * heading and trailing eyecatchers */ -static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) +static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) { struct smc_clc_msg_proposal_prefix *pclc_prfx; struct smc_clc_msg_accept_confirm *clc; @@ -49,12 +50,9 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) return false; switch (clcm->type) { case SMC_CLC_PROPOSAL: - if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D && - clcm->path != SMC_TYPE_B) - return false; pclc = (struct smc_clc_msg_proposal *)clcm; pclc_prfx = smc_clc_proposal_get_prefix(pclc); - if (ntohs(pclc->hdr.length) != + if (ntohs(pclc->hdr.length) < sizeof(*pclc) + ntohs(pclc->iparea_offset) + sizeof(*pclc_prfx) + pclc_prfx->ipv6_prefixes_cnt * @@ -86,7 +84,8 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) default: return false; } - if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && + if (check_trl && + memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) return false; return true; @@ -276,7 +275,8 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, struct msghdr msg = {NULL, 0}; int reason_code = 0; struct kvec vec = {buf, buflen}; - int len, datlen; + int len, datlen, recvlen; + bool check_trl = true; int krflags; /* peek the first few bytes to determine length of data to receive @@ -320,10 +320,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || - (datlen > buflen) || - (clcm->version != SMC_CLC_V1) || - (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D && - clcm->path != SMC_TYPE_B) || + (clcm->version < SMC_CLC_V1) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; @@ -331,16 +328,38 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, goto out; } + if (clcm->type == SMC_CLC_PROPOSAL && clcm->path == SMC_TYPE_N) + reason_code = SMC_CLC_DECL_VERSMISMAT; /* just V2 offered */ + /* receive the complete CLC message */ memset(&msg, 0, sizeof(struct msghdr)); - iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen); + if (datlen > buflen) { + check_trl = false; + recvlen = buflen; + } else { + recvlen = datlen; + } + iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen); krflags = MSG_WAITALL; len = sock_recvmsg(smc->clcsock, &msg, krflags); - if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { + if (len < recvlen || !smc_clc_msg_hdr_valid(clcm, check_trl)) { smc->sk.sk_err = EPROTO; reason_code = -EPROTO; goto out; } + datlen -= len; + while (datlen) { + u8 tmp[SMC_CLC_RECV_BUF_LEN]; + + vec.iov_base = &tmp; + vec.iov_len = SMC_CLC_RECV_BUF_LEN; + /* receive remaining proposal message */ + recvlen = datlen > SMC_CLC_RECV_BUF_LEN ? + SMC_CLC_RECV_BUF_LEN : datlen; + iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen); + len = sock_recvmsg(smc->clcsock, &msg, krflags); + datlen -= len; + } if (clcm->type == SMC_CLC_DECLINE) { struct smc_clc_msg_decline *dclc; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 465876701b75..76c2b150d040 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -25,6 +25,7 @@ #define SMC_CLC_V1 0x1 /* SMC version */ #define SMC_TYPE_R 0 /* SMC-R only */ #define SMC_TYPE_D 1 /* SMC-D only */ +#define SMC_TYPE_N 2 /* neither SMC-R nor SMC-D */ #define SMC_TYPE_B 3 /* SMC-R and SMC-D */ #define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */ #define CLC_WAIT_TIME_SHORT HZ /* short wait time on clcsock */ @@ -46,6 +47,7 @@ #define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */ #define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */ #define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */ +#define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */ #define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ #define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */ #define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */ From 68cf617309b5f6f3a651165f49f20af1494753ae Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 Jul 2020 17:25:46 +0100 Subject: [PATCH 262/343] KVM: arm64: Fix definition of PAGE_HYP_DEVICE PAGE_HYP_DEVICE is intended to encode attribute bits for an EL2 stage-1 pte mapping a device. Unfortunately, it includes PROT_DEVICE_nGnRE which encodes attributes for EL1 stage-1 mappings such as UXN and nG, which are RES0 for EL2, and DBM which is meaningless as TCR_EL2.HD is not set. Fix the definition of PAGE_HYP_DEVICE so that it doesn't set RES0 bits at EL2. Acked-by: Marc Zyngier Cc: Marc Zyngier Cc: Catalin Marinas Cc: James Morse Cc: Link: https://lore.kernel.org/r/20200708162546.26176-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 2e7e0f452301..4d867c6446c4 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -67,7 +67,7 @@ extern bool arm64_use_ng_mappings; #define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) #define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) #define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) -#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) +#define PAGE_HYP_DEVICE __pgprot(_PROT_DEFAULT | PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_HYP | PTE_HYP_XN) #define PAGE_S2_MEMATTR(attr) \ ({ \ From c377e67c6271954969384f9be1b1b71de13eba30 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Jun 2020 17:52:27 +1000 Subject: [PATCH 263/343] drivers/firmware/psci: Fix memory leakage in alloc_init_cpu_groups() The CPU mask (@tmp) should be released on failing to allocate @cpu_groups or any of its elements. Otherwise, it leads to memory leakage because the CPU mask variable is dynamically allocated when CONFIG_CPUMASK_OFFSTACK is enabled. Signed-off-by: Gavin Shan Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20200630075227.199624-1-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/psci/psci_checker.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/psci/psci_checker.c b/drivers/firmware/psci/psci_checker.c index 873841af8d57..d9b1a2d71223 100644 --- a/drivers/firmware/psci/psci_checker.c +++ b/drivers/firmware/psci/psci_checker.c @@ -157,8 +157,10 @@ static int alloc_init_cpu_groups(cpumask_var_t **pcpu_groups) cpu_groups = kcalloc(nb_available_cpus, sizeof(cpu_groups), GFP_KERNEL); - if (!cpu_groups) + if (!cpu_groups) { + free_cpumask_var(tmp); return -ENOMEM; + } cpumask_copy(tmp, cpu_online_mask); @@ -167,6 +169,7 @@ static int alloc_init_cpu_groups(cpumask_var_t **pcpu_groups) topology_core_cpumask(cpumask_any(tmp)); if (!alloc_cpumask_var(&cpu_groups[num_groups], GFP_KERNEL)) { + free_cpumask_var(tmp); free_cpu_groups(num_groups, &cpu_groups); return -ENOMEM; } From 132330f8044c8e0cfa83b5eee41ade52708390dc Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Jun 2020 17:59:43 +1000 Subject: [PATCH 264/343] drivers/firmware/psci: Assign @err directly in hotplug_tests() The return value of down_and_up_cpus() can be assigned to @err directly. With that, the useless assignment to @err with zero can be dropped. Signed-off-by: Gavin Shan Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20200630075943.203954-1-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/psci/psci_checker.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/firmware/psci/psci_checker.c b/drivers/firmware/psci/psci_checker.c index d9b1a2d71223..3d6ba425dbb9 100644 --- a/drivers/firmware/psci/psci_checker.c +++ b/drivers/firmware/psci/psci_checker.c @@ -199,13 +199,12 @@ static int hotplug_tests(void) if (!page_buf) goto out_free_cpu_groups; - err = 0; /* * Of course the last CPU cannot be powered down and cpu_down() should * refuse doing that. */ pr_info("Trying to turn off and on again all CPUs\n"); - err += down_and_up_cpus(cpu_online_mask, offlined_cpus); + err = down_and_up_cpus(cpu_online_mask, offlined_cpus); /* * Take down CPUs by cpu group this time. When the last CPU is turned From b8c1c9fe6a042dfbb169d14ab2000d9163f06d10 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Fri, 17 Apr 2020 18:32:11 +0800 Subject: [PATCH 265/343] arm64: entry: Fix the typo in the comment of el1_dbg() The function name should be local_daif_mask(). Signed-off-by: Kevin Hao Acked-by: Mark Rutlamd Link: https://lore.kernel.org/r/20200417103212.45812-2-haokexin@gmail.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 3dbdf9752b11..d3be9dbf5490 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -57,7 +57,7 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) /* * The CPU masked interrupts, and we are leaving them masked during * do_debug_exception(). Update PMR as if we had called - * local_mask_daif(). + * local_daif_mask(). */ if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); From 97884ca8c2925d14c32188e865069f21378b4b4f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 6 Jul 2020 17:37:59 +0100 Subject: [PATCH 266/343] arm64: Introduce a way to disable the 32bit vdso We have a class of errata (grouped under the ARM64_WORKAROUND_1418040 banner) that force the trapping of counter access from 32bit EL0. We would normally disable the whole vdso for such defect, except that it would disable it for 64bit userspace as well, which is a shame. Instead, add a new vdso_clock_mode, which signals that the vdso isn't usable for compat tasks. This gets checked in the new vdso_clocksource_ok() helper, now provided for the 32bit vdso. Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200706163802.1836732-2-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/vdso/clocksource.h | 7 +++++-- arch/arm64/include/asm/vdso/compat_gettimeofday.h | 8 +++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/vdso/clocksource.h b/arch/arm64/include/asm/vdso/clocksource.h index df6ea65c1dec..b054d9febfb5 100644 --- a/arch/arm64/include/asm/vdso/clocksource.h +++ b/arch/arm64/include/asm/vdso/clocksource.h @@ -2,7 +2,10 @@ #ifndef __ASM_VDSOCLOCKSOURCE_H #define __ASM_VDSOCLOCKSOURCE_H -#define VDSO_ARCH_CLOCKMODES \ - VDSO_CLOCKMODE_ARCHTIMER +#define VDSO_ARCH_CLOCKMODES \ + /* vdso clocksource for both 32 and 64bit tasks */ \ + VDSO_CLOCKMODE_ARCHTIMER, \ + /* vdso clocksource for 64bit tasks only */ \ + VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT #endif diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index b6907ae78e53..9a625e8947ff 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -111,7 +111,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) * update. Return something. Core will do another round and then * see the mode change and fallback to the syscall. */ - if (clock_mode == VDSO_CLOCKMODE_NONE) + if (clock_mode != VDSO_CLOCKMODE_ARCHTIMER) return 0; /* @@ -152,6 +152,12 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) return ret; } +static inline bool vdso_clocksource_ok(const struct vdso_data *vd) +{ + return vd->clock_mode == VDSO_CLOCKMODE_ARCHTIMER; +} +#define vdso_clocksource_ok vdso_clocksource_ok + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ From c1fbec4ac0d701f350a581941d35643d5a9cd184 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 6 Jul 2020 17:38:00 +0100 Subject: [PATCH 267/343] arm64: arch_timer: Allow an workaround descriptor to disable compat vdso As we are about to disable the vdso for compat tasks in some circumstances, let's allow a workaround descriptor to express exactly that. Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200706163802.1836732-3-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_timer.h | 1 + drivers/clocksource/arm_arch_timer.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 7ae54d7d333a..9f0ec21d6327 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -58,6 +58,7 @@ struct arch_timer_erratum_workaround { u64 (*read_cntvct_el0)(void); int (*set_next_event_phys)(unsigned long, struct clock_event_device *); int (*set_next_event_virt)(unsigned long, struct clock_event_device *); + bool disable_compat_vdso; }; DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *, diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index ecf7b7db2d05..a8e4fb429f52 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -566,6 +566,9 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa if (wa->read_cntvct_el0) { clocksource_counter.vdso_clock_mode = VDSO_CLOCKMODE_NONE; vdso_default = VDSO_CLOCKMODE_NONE; + } else if (wa->disable_compat_vdso && vdso_default != VDSO_CLOCKMODE_NONE) { + vdso_default = VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT; + clocksource_counter.vdso_clock_mode = vdso_default; } } From 4b661d6133c5d3a7c9aca0b4ee5a78c7766eff3f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 6 Jul 2020 17:38:01 +0100 Subject: [PATCH 268/343] arm64: arch_timer: Disable the compat vdso for cores affected by ARM64_WORKAROUND_1418040 ARM64_WORKAROUND_1418040 requires that AArch32 EL0 accesses to the virtual counter register are trapped and emulated by the kernel. This makes the vdso pretty pointless, and in some cases livelock prone. Provide a workaround entry that limits the vdso to 64bit tasks. Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200706163802.1836732-4-maz@kernel.org Signed-off-by: Will Deacon --- drivers/clocksource/arm_arch_timer.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a8e4fb429f52..6c3e84180146 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -480,6 +480,14 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { .set_next_event_virt = erratum_set_next_event_tval_virt, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1418040 + { + .match_type = ate_match_local_cap_id, + .id = (void *)ARM64_WORKAROUND_1418040, + .desc = "ARM erratum 1418040", + .disable_compat_vdso = true, + }, +#endif }; typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *, From dc802f2bc0208f4abca420705a860c5175db4bee Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 6 Jul 2020 17:38:02 +0100 Subject: [PATCH 269/343] arm64: Rework ARM_ERRATUM_1414080 handling The current handling of erratum 1414080 has the side effect that cntkctl_el1 can get changed for both 32 and 64bit tasks. This isn't a problem so far, but if we ever need to mitigate another of these errata on the 64bit side, we'd better keep the messing with cntkctl_el1 local to 32bit tasks. For that, make sure that on entering the kernel from a 32bit tasks, userspace access to cntvct gets enabled, and disabled returning to userspace, while it never gets changed for 64bit tasks. Signed-off-by: Marc Zyngier Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20200706163802.1836732-5-maz@kernel.org [will: removed branch instructions per Mark's review comments] Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5304d193c79d..9757a8d5fd94 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -167,6 +167,17 @@ alternative_cb_end stp x28, x29, [sp, #16 * 14] .if \el == 0 + .if \regsize == 32 + // If we're returning from a 32-bit task on a system affected by + // 1418040 then re-enable userspace access to the virtual counter. +#ifdef CONFIG_ARM64_ERRATUM_1418040 +alternative_if ARM64_WORKAROUND_1418040 + mrs x0, cntkctl_el1 + orr x0, x0, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN + msr cntkctl_el1, x0 +alternative_else_nop_endif +#endif + .endif clear_gp_regs mrs x21, sp_el0 ldr_this_cpu tsk, __entry_task, x20 @@ -320,6 +331,14 @@ alternative_else_nop_endif tst x22, #PSR_MODE32_BIT // native task? b.eq 3f +#ifdef CONFIG_ARM64_ERRATUM_1418040 +alternative_if ARM64_WORKAROUND_1418040 + mrs x0, cntkctl_el1 + bic x0, x0, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN + msr cntkctl_el1, x0 +alternative_else_nop_endif +#endif + #ifdef CONFIG_ARM64_ERRATUM_845719 alternative_if ARM64_WORKAROUND_845719 #ifdef CONFIG_PID_IN_CONTEXTIDR @@ -331,21 +350,6 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: -#ifdef CONFIG_ARM64_ERRATUM_1418040 -alternative_if_not ARM64_WORKAROUND_1418040 - b 4f -alternative_else_nop_endif - /* - * if (x22.mode32 == cntkctl_el1.el0vcten) - * cntkctl_el1.el0vcten = ~cntkctl_el1.el0vcten - */ - mrs x1, cntkctl_el1 - eon x0, x1, x22, lsr #3 - tbz x0, #1, 4f - eor x1, x1, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN - msr cntkctl_el1, x1 -4: -#endif scs_save tsk, x0 /* No kernel C function calls after this as user keys are set. */ From 8c3001b9252d8dbf72289d3590a723eea8cfe824 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 Jul 2020 22:10:01 +0100 Subject: [PATCH 270/343] arm64: entry: Tidy up block comments and label numbers Continually butchering our entry code with CPU errata workarounds has led to it looking a little scruffy. Consistently used /* */ comment style for multi-line block comments and ensure that small numeric labels use consecutive integers. No functional change, but the state of things was irritating. Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 9757a8d5fd94..35de8ba60e3d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -126,8 +126,10 @@ alternative_else_nop_endif add \dst, \dst, #(\sym - .entry.tramp.text) .endm - // This macro corrupts x0-x3. It is the caller's duty - // to save/restore them if required. + /* + * This macro corrupts x0-x3. It is the caller's duty to save/restore + * them if required. + */ .macro apply_ssbd, state, tmp1, tmp2 #ifdef CONFIG_ARM64_SSBD alternative_cb arm64_enable_wa2_handling @@ -168,8 +170,10 @@ alternative_cb_end .if \el == 0 .if \regsize == 32 - // If we're returning from a 32-bit task on a system affected by - // 1418040 then re-enable userspace access to the virtual counter. + /* + * If we're returning from a 32-bit task on a system affected by + * 1418040 then re-enable userspace access to the virtual counter. + */ #ifdef CONFIG_ARM64_ERRATUM_1418040 alternative_if ARM64_WORKAROUND_1418040 mrs x0, cntkctl_el1 @@ -183,8 +187,10 @@ alternative_else_nop_endif ldr_this_cpu tsk, __entry_task, x20 msr sp_el0, tsk - // Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions - // when scheduling. + /* + * Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions + * when scheduling. + */ ldr x19, [tsk, #TSK_TI_FLAGS] disable_step_tsk x19, x20 @@ -381,11 +387,11 @@ alternative_else_nop_endif .if \el == 0 alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - bne 5f + bne 4f msr far_el1, x30 tramp_alias x30, tramp_exit_native br x30 -5: +4: tramp_alias x30, tramp_exit_compat br x30 #endif From 8523c006264df65aac7d77284cc69aac46a6f842 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Sun, 10 May 2020 05:41:56 +0800 Subject: [PATCH 271/343] arm64: kgdb: Fix single-step exception handling oops After entering kdb due to breakpoint, when we execute 'ss' or 'go' (will delay installing breakpoints, do single-step first), it won't work correctly, and it will enter kdb due to oops. It's because the reason gotten in kdb_stub() is not as expected, and it seems that the ex_vector for single-step should be 0, like what arch powerpc/sh/parisc has implemented. Before the patch: Entering kdb (current=0xffff8000119e2dc0, pid 0) on processor 0 due to Keyboard Entry [0]kdb> bp printk Instruction(i) BP #0 at 0xffff8000101486cc (printk) is enabled addr at ffff8000101486cc, hardtype=0 installed=0 [0]kdb> g / # echo h > /proc/sysrq-trigger Entering kdb (current=0xffff0000fa878040, pid 266) on processor 3 due to Breakpoint @ 0xffff8000101486cc [3]kdb> ss Entering kdb (current=0xffff0000fa878040, pid 266) on processor 3 Oops: (null) due to oops @ 0xffff800010082ab8 CPU: 3 PID: 266 Comm: sh Not tainted 5.7.0-rc4-13839-gf0e5ad491718 #6 Hardware name: linux,dummy-virt (DT) pstate: 00000085 (nzcv daIf -PAN -UAO) pc : el1_irq+0x78/0x180 lr : __handle_sysrq+0x80/0x190 sp : ffff800015003bf0 x29: ffff800015003d20 x28: ffff0000fa878040 x27: 0000000000000000 x26: ffff80001126b1f0 x25: ffff800011b6a0d8 x24: 0000000000000000 x23: 0000000080200005 x22: ffff8000101486cc x21: ffff800015003d30 x20: 0000ffffffffffff x19: ffff8000119f2000 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffff800015003e50 x7 : 0000000000000002 x6 : 00000000380b9990 x5 : ffff8000106e99e8 x4 : ffff0000fadd83c0 x3 : 0000ffffffffffff x2 : ffff800011b6a0d8 x1 : ffff800011b6a000 x0 : ffff80001130c9d8 Call trace: el1_irq+0x78/0x180 printk+0x0/0x84 write_sysrq_trigger+0xb0/0x118 proc_reg_write+0xb4/0xe0 __vfs_write+0x18/0x40 vfs_write+0xb0/0x1b8 ksys_write+0x64/0xf0 __arm64_sys_write+0x14/0x20 el0_svc_common.constprop.2+0xb0/0x168 do_el0_svc+0x20/0x98 el0_sync_handler+0xec/0x1a8 el0_sync+0x140/0x180 [3]kdb> After the patch: Entering kdb (current=0xffff8000119e2dc0, pid 0) on processor 0 due to Keyboard Entry [0]kdb> bp printk Instruction(i) BP #0 at 0xffff8000101486cc (printk) is enabled addr at ffff8000101486cc, hardtype=0 installed=0 [0]kdb> g / # echo h > /proc/sysrq-trigger Entering kdb (current=0xffff0000fa852bc0, pid 268) on processor 0 due to Breakpoint @ 0xffff8000101486cc [0]kdb> g Entering kdb (current=0xffff0000fa852bc0, pid 268) on processor 0 due to Breakpoint @ 0xffff8000101486cc [0]kdb> ss Entering kdb (current=0xffff0000fa852bc0, pid 268) on processor 0 due to SS trap @ 0xffff800010082ab8 [0]kdb> Fixes: 44679a4f142b ("arm64: KGDB: Add step debugging support") Signed-off-by: Wei Li Tested-by: Douglas Anderson Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20200509214159.19680-2-liwei391@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 43119922341f..1a157ca33262 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -252,7 +252,7 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) if (!kgdb_single_step) return DBG_HOOK_ERROR; - kgdb_handle_exception(1, SIGTRAP, 0, regs); + kgdb_handle_exception(0, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_step_brk_fn); From 581fce373581772470af8fb4fe13505dc66281e6 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 7 Jul 2020 15:31:52 +0100 Subject: [PATCH 272/343] arm64: Documentation: Fix broken table in generated HTML cpu-feature-registers.rst is missing a new line before a couple of tables listing the visible fields, causing broken tables in the HTML documentation generated by "make htmldocs". Fix this by adding the missing new line. Reported-by: Peter Maydell Signed-off-by: Suzuki K Poulose Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Link: https://lore.kernel.org/r/20200707143152.154541-1-suzuki.poulose@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/cpu-feature-registers.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index 314fa5bc2655..f28853f80089 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -171,6 +171,7 @@ infrastructure: 3) ID_AA64PFR1_EL1 - Processor Feature Register 1 + +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ @@ -181,6 +182,7 @@ infrastructure: 4) MIDR_EL1 - Main ID Register + +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ From 579dd91ab3a5446b148e7f179b6596b270dace46 Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Mon, 29 Jun 2020 09:23:49 +0800 Subject: [PATCH 273/343] nbd: Fix memory leak in nbd_add_socket When adding first socket to nbd, if nsock's allocation failed, the data structure member "config->socks" was reallocated, but the data structure member "config->num_connections" was not updated. A memory leak will occur then because the function "nbd_config_put" will free "config->socks" only when "config->num_connections" is not zero. Fixes: 03bf73c315ed ("nbd: prevent memory leak") Reported-by: syzbot+934037347002901b8d2a@syzkaller.appspotmail.com Signed-off-by: Zheng Bin Reviewed-by: Eric Biggers Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 43cff01a5a67..ce7e9f223b20 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1033,25 +1033,26 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, test_bit(NBD_RT_BOUND, &config->runtime_flags))) { dev_err(disk_to_dev(nbd->disk), "Device being setup by another task"); - sockfd_put(sock); - return -EBUSY; + err = -EBUSY; + goto put_socket; + } + + nsock = kzalloc(sizeof(*nsock), GFP_KERNEL); + if (!nsock) { + err = -ENOMEM; + goto put_socket; } socks = krealloc(config->socks, (config->num_connections + 1) * sizeof(struct nbd_sock *), GFP_KERNEL); if (!socks) { - sockfd_put(sock); - return -ENOMEM; + kfree(nsock); + err = -ENOMEM; + goto put_socket; } config->socks = socks; - nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); - if (!nsock) { - sockfd_put(sock); - return -ENOMEM; - } - nsock->fallback_index = -1; nsock->dead = false; mutex_init(&nsock->tx_lock); @@ -1063,6 +1064,10 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, atomic_inc(&config->live_connections); return 0; + +put_socket: + sockfd_put(sock); + return err; } static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) From 27d53323664c549b5bb2dfaaf6f7ad6e0376a64e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Jul 2020 02:02:32 +0800 Subject: [PATCH 274/343] l2tp: remove skb_dst_set() from l2tp_xmit_skb() In the tx path of l2tp, l2tp_xmit_skb() calls skb_dst_set() to set skb's dst. However, it will eventually call inet6_csk_xmit() or ip_queue_xmit() where skb's dst will be overwritten by: skb_dst_set_noref(skb, dst); without releasing the old dst in skb. Then it causes dst/dev refcnt leak: unregister_netdevice: waiting for eth0 to become free. Usage count = 1 This can be reproduced by simply running: # modprobe l2tp_eth && modprobe l2tp_ip # sh ./tools/testing/selftests/net/l2tp.sh So before going to inet6_csk_xmit() or ip_queue_xmit(), skb's dst should be dropped. This patch is to fix it by removing skb_dst_set() from l2tp_xmit_skb() and moving skb_dst_drop() into l2tp_xmit_core(). Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") Reported-by: Hangbin Liu Signed-off-by: Xin Long Acked-by: James Chapman Tested-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6d7ef78c88af..6434d17e6e8e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1028,6 +1028,7 @@ static void l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Queue the packet to IP for output */ skb->ignore_df = 1; + skb_dst_drop(skb); #if IS_ENABLED(CONFIG_IPV6) if (l2tp_sk_is_v6(tunnel->sock)) error = inet6_csk_xmit(tunnel->sock, skb, NULL); @@ -1099,10 +1100,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len goto out_unlock; } - /* Get routing info from the tunnel socket */ - skb_dst_drop(skb); - skb_dst_set(skb, sk_dst_check(sk, 0)); - inet = inet_sk(sk); fl = &inet->cork.fl; switch (tunnel->encap) { From a34f829164f3c70d7f53bb532ddcc39fa890b722 Mon Sep 17 00:00:00 2001 From: Hamish Martin Date: Thu, 9 Jul 2020 09:06:44 +1200 Subject: [PATCH 275/343] tipc: fix retransmission on unicast links A scenario has been observed where a 'bc_init' message for a link is not retransmitted if it fails to be received by the peer. This leads to the peer never establishing the link fully and it discarding all other data received on the link. In this scenario the message is lost in transit to the peer. The issue is traced to the 'nxt_retr' field of the skb not being initialised for links that aren't a bc_sndlink. This leads to the comparison in tipc_link_advance_transmq() that gates whether to attempt retransmission of a message performing in an undesirable way. Depending on the relative value of 'jiffies', this comparison: time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr) may return true or false given that 'nxt_retr' remains at the uninitialised value of 0 for non bc_sndlinks. This is most noticeable shortly after boot when jiffies is initialised to a high value (to flush out rollover bugs) and we compare a jiffies of, say, 4294940189 to zero. In that case time_before returns 'true' leading to the skb not being retransmitted. The fix is to ensure that all skbs have a valid 'nxt_retr' time set for them and this is achieved by refactoring the setting of this value into a central function. With this fix, transmission losses of 'bc_init' messages do not stall the link establishment forever because the 'bc_init' message is retransmitted and the link eventually establishes correctly. Fixes: 382f598fb66b ("tipc: reduce duplicate packets for unicast traffic") Acked-by: Jon Maloy Signed-off-by: Hamish Martin Signed-off-by: David S. Miller --- net/tipc/link.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index ee3b8d0576b8..263d950e70e9 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -921,6 +921,21 @@ static void link_prepare_wakeup(struct tipc_link *l) } +/** + * tipc_link_set_skb_retransmit_time - set the time at which retransmission of + * the given skb should be next attempted + * @skb: skb to set a future retransmission time for + * @l: link the skb will be transmitted on + */ +static void tipc_link_set_skb_retransmit_time(struct sk_buff *skb, + struct tipc_link *l) +{ + if (link_is_bc_sndlink(l)) + TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + else + TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; +} + void tipc_link_reset(struct tipc_link *l) { struct sk_buff_head list; @@ -1036,9 +1051,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return -ENOBUFS; } __skb_queue_tail(transmq, skb); - /* next retransmit attempt */ - if (link_is_bc_sndlink(l)) - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; l->rcv_unacked = 0; @@ -1139,9 +1152,7 @@ static void tipc_link_advance_backlog(struct tipc_link *l, if (unlikely(skb == l->backlog[imp].target_bskb)) l->backlog[imp].target_bskb = NULL; __skb_queue_tail(&l->transmq, skb); - /* next retransmit attempt */ - if (link_is_bc_sndlink(l)) - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; @@ -1584,8 +1595,7 @@ static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, /* retransmit skb if unrestricted*/ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) continue; - TIPC_SKB_CB(skb)->nxt_retr = (is_uc) ? - TIPC_UC_RETR_TIME : TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); _skb = pskb_copy(skb, GFP_ATOMIC); if (!_skb) continue; From 76c4d85c9260c3d741cbd194c30c61983d0a4303 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 9 Jul 2020 03:14:27 +0530 Subject: [PATCH 276/343] cxgb4: fix all-mask IP address comparison Convert all-mask IP address to Big Endian, instead, for comparison. Fixes: f286dd8eaad5 ("cxgb4: use correct type for all-mask IP address comparison") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 7a7f61a8cdf4..d02d346629b3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1112,16 +1112,16 @@ static bool is_addr_all_mask(u8 *ipmask, int family) struct in_addr *addr; addr = (struct in_addr *)ipmask; - if (ntohl(addr->s_addr) == 0xffffffff) + if (addr->s_addr == htonl(0xffffffff)) return true; } else if (family == AF_INET6) { struct in6_addr *addr6; addr6 = (struct in6_addr *)ipmask; - if (ntohl(addr6->s6_addr32[0]) == 0xffffffff && - ntohl(addr6->s6_addr32[1]) == 0xffffffff && - ntohl(addr6->s6_addr32[2]) == 0xffffffff && - ntohl(addr6->s6_addr32[3]) == 0xffffffff) + if (addr6->s6_addr32[0] == htonl(0xffffffff) && + addr6->s6_addr32[1] == htonl(0xffffffff) && + addr6->s6_addr32[2] == htonl(0xffffffff) && + addr6->s6_addr32[3] == htonl(0xffffffff)) return true; } return false; From 160251842cd35a75edfb0a1d76afa3eb674ff40a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Jul 2020 11:49:23 -0700 Subject: [PATCH 277/343] kallsyms: Refactor kallsyms_show_value() to take cred In order to perform future tests against the cred saved during open(), switch kallsyms_show_value() to operate on a cred, and have all current callers pass current_cred(). This makes it very obvious where callers are checking the wrong credential in their "read" contexts. These will be fixed in the coming patches. Additionally switch return value to bool, since it is always used as a direct permission check, not a 0-on-success, negative-on-error style function return. Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- include/linux/filter.h | 2 +- include/linux/kallsyms.h | 5 +++-- kernel/kallsyms.c | 17 +++++++++++------ kernel/kprobes.c | 4 ++-- kernel/module.c | 2 +- 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 259377723603..55104f6c78e8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -889,7 +889,7 @@ static inline bool bpf_dump_raw_ok(void) /* Reconstruction of call-sites is dependent on kallsyms, * thus make dump the same restriction. */ - return kallsyms_show_value() == 1; + return kallsyms_show_value(current_cred()); } struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 98338dc6b5d2..481273f0c72d 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -18,6 +18,7 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) +struct cred; struct module; static inline int is_kernel_inittext(unsigned long addr) @@ -98,7 +99,7 @@ int lookup_symbol_name(unsigned long addr, char *symname); int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); /* How and when do we show kallsyms values? */ -extern int kallsyms_show_value(void); +extern bool kallsyms_show_value(const struct cred *cred); #else /* !CONFIG_KALLSYMS */ @@ -158,7 +159,7 @@ static inline int lookup_symbol_attrs(unsigned long addr, unsigned long *size, u return -ERANGE; } -static inline int kallsyms_show_value(void) +static inline bool kallsyms_show_value(const struct cred *cred) { return false; } diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 16c8c605f4b0..bb14e64f62a4 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -644,19 +644,20 @@ static inline int kallsyms_for_perf(void) * Otherwise, require CAP_SYSLOG (assuming kptr_restrict isn't set to * block even that). */ -int kallsyms_show_value(void) +bool kallsyms_show_value(const struct cred *cred) { switch (kptr_restrict) { case 0: if (kallsyms_for_perf()) - return 1; + return true; /* fallthrough */ case 1: - if (has_capability_noaudit(current, CAP_SYSLOG)) - return 1; + if (security_capable(cred, &init_user_ns, CAP_SYSLOG, + CAP_OPT_NOAUDIT) == 0) + return true; /* fallthrough */ default: - return 0; + return false; } } @@ -673,7 +674,11 @@ static int kallsyms_open(struct inode *inode, struct file *file) return -ENOMEM; reset_iter(iter, 0); - iter->show_value = kallsyms_show_value(); + /* + * Instead of checking this on every s_show() call, cache + * the result here at open time. + */ + iter->show_value = kallsyms_show_value(file->f_cred); return 0; } diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 4a904cc56d68..d4de217e4a91 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2448,7 +2448,7 @@ static void report_probe(struct seq_file *pi, struct kprobe *p, else kprobe_type = "k"; - if (!kallsyms_show_value()) + if (!kallsyms_show_value(current_cred())) addr = NULL; if (sym) @@ -2540,7 +2540,7 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) * If /proc/kallsyms is not showing kernel address, we won't * show them here either. */ - if (!kallsyms_show_value()) + if (!kallsyms_show_value(current_cred())) seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL, (void *)ent->start_addr); else diff --git a/kernel/module.c b/kernel/module.c index e8a198588f26..a5022ae84e50 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4377,7 +4377,7 @@ static int modules_open(struct inode *inode, struct file *file) if (!err) { struct seq_file *m = file->private_data; - m->private = kallsyms_show_value() ? NULL : (void *)8ul; + m->private = kallsyms_show_value(current_cred()) ? NULL : (void *)8ul; } return err; From ed66f991bb19d94cae5d38f77de81f96aac7813f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Jul 2020 13:47:20 -0700 Subject: [PATCH 278/343] module: Refactor section attr into bin attribute In order to gain access to the open file's f_cred for kallsym visibility permission checks, refactor the module section attributes to use the bin_attribute instead of attribute interface. Additionally removes the redundant "name" struct member. Cc: stable@vger.kernel.org Reviewed-by: Greg Kroah-Hartman Tested-by: Jessica Yu Acked-by: Jessica Yu Signed-off-by: Kees Cook --- kernel/module.c | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index a5022ae84e50..9e2954519259 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1510,8 +1510,7 @@ static inline bool sect_empty(const Elf_Shdr *sect) } struct module_sect_attr { - struct module_attribute mattr; - char *name; + struct bin_attribute battr; unsigned long address; }; @@ -1521,11 +1520,16 @@ struct module_sect_attrs { struct module_sect_attr attrs[]; }; -static ssize_t module_sect_show(struct module_attribute *mattr, - struct module_kobject *mk, char *buf) +static ssize_t module_sect_read(struct file *file, struct kobject *kobj, + struct bin_attribute *battr, + char *buf, loff_t pos, size_t count) { struct module_sect_attr *sattr = - container_of(mattr, struct module_sect_attr, mattr); + container_of(battr, struct module_sect_attr, battr); + + if (pos != 0) + return -EINVAL; + return sprintf(buf, "0x%px\n", kptr_restrict < 2 ? (void *)sattr->address : NULL); } @@ -1535,7 +1539,7 @@ static void free_sect_attrs(struct module_sect_attrs *sect_attrs) unsigned int section; for (section = 0; section < sect_attrs->nsections; section++) - kfree(sect_attrs->attrs[section].name); + kfree(sect_attrs->attrs[section].battr.attr.name); kfree(sect_attrs); } @@ -1544,42 +1548,41 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info) unsigned int nloaded = 0, i, size[2]; struct module_sect_attrs *sect_attrs; struct module_sect_attr *sattr; - struct attribute **gattr; + struct bin_attribute **gattr; /* Count loaded sections and allocate structures */ for (i = 0; i < info->hdr->e_shnum; i++) if (!sect_empty(&info->sechdrs[i])) nloaded++; size[0] = ALIGN(struct_size(sect_attrs, attrs, nloaded), - sizeof(sect_attrs->grp.attrs[0])); - size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]); + sizeof(sect_attrs->grp.bin_attrs[0])); + size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.bin_attrs[0]); sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL); if (sect_attrs == NULL) return; /* Setup section attributes. */ sect_attrs->grp.name = "sections"; - sect_attrs->grp.attrs = (void *)sect_attrs + size[0]; + sect_attrs->grp.bin_attrs = (void *)sect_attrs + size[0]; sect_attrs->nsections = 0; sattr = §_attrs->attrs[0]; - gattr = §_attrs->grp.attrs[0]; + gattr = §_attrs->grp.bin_attrs[0]; for (i = 0; i < info->hdr->e_shnum; i++) { Elf_Shdr *sec = &info->sechdrs[i]; if (sect_empty(sec)) continue; + sysfs_bin_attr_init(&sattr->battr); sattr->address = sec->sh_addr; - sattr->name = kstrdup(info->secstrings + sec->sh_name, - GFP_KERNEL); - if (sattr->name == NULL) + sattr->battr.attr.name = + kstrdup(info->secstrings + sec->sh_name, GFP_KERNEL); + if (sattr->battr.attr.name == NULL) goto out; sect_attrs->nsections++; - sysfs_attr_init(&sattr->mattr.attr); - sattr->mattr.show = module_sect_show; - sattr->mattr.store = NULL; - sattr->mattr.attr.name = sattr->name; - sattr->mattr.attr.mode = S_IRUSR; - *(gattr++) = &(sattr++)->mattr.attr; + sattr->battr.read = module_sect_read; + sattr->battr.size = 3 /* "0x", "\n" */ + (BITS_PER_LONG / 4); + sattr->battr.attr.mode = 0400; + *(gattr++) = &(sattr++)->battr; } *gattr = NULL; @@ -1669,7 +1672,7 @@ static void add_notes_attrs(struct module *mod, const struct load_info *info) continue; if (info->sechdrs[i].sh_type == SHT_NOTE) { sysfs_bin_attr_init(nattr); - nattr->attr.name = mod->sect_attrs->attrs[loaded].name; + nattr->attr.name = mod->sect_attrs->attrs[loaded].battr.attr.name; nattr->attr.mode = S_IRUGO; nattr->size = info->sechdrs[i].sh_size; nattr->private = (void *) info->sechdrs[i].sh_addr; From b25a7c5af9051850d4f3d93ca500056ab6ec724b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Jul 2020 14:43:59 -0700 Subject: [PATCH 279/343] module: Do not expose section addresses to non-CAP_SYSLOG The printing of section addresses in /sys/module/*/sections/* was not using the correct credentials to evaluate visibility. Before: # cat /sys/module/*/sections/.*text 0xffffffffc0458000 ... # capsh --drop=CAP_SYSLOG -- -c "cat /sys/module/*/sections/.*text" 0xffffffffc0458000 ... After: # cat /sys/module/*/sections/*.text 0xffffffffc0458000 ... # capsh --drop=CAP_SYSLOG -- -c "cat /sys/module/*/sections/.*text" 0x0000000000000000 ... Additionally replaces the existing (safe) /proc/modules check with file->f_cred for consistency. Reported-by: Dominik Czarnota Fixes: be71eda5383f ("module: Fix display of wrong module .text address") Cc: stable@vger.kernel.org Tested-by: Jessica Yu Acked-by: Jessica Yu Signed-off-by: Kees Cook --- kernel/module.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index 9e2954519259..e6c7571092cb 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1530,8 +1530,8 @@ static ssize_t module_sect_read(struct file *file, struct kobject *kobj, if (pos != 0) return -EINVAL; - return sprintf(buf, "0x%px\n", kptr_restrict < 2 ? - (void *)sattr->address : NULL); + return sprintf(buf, "0x%px\n", + kallsyms_show_value(file->f_cred) ? (void *)sattr->address : NULL); } static void free_sect_attrs(struct module_sect_attrs *sect_attrs) @@ -4380,7 +4380,7 @@ static int modules_open(struct inode *inode, struct file *file) if (!err) { struct seq_file *m = file->private_data; - m->private = kallsyms_show_value(current_cred()) ? NULL : (void *)8ul; + m->private = kallsyms_show_value(file->f_cred) ? NULL : (void *)8ul; } return err; From 60f7bb66b88b649433bf700acfc60c3f24953871 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Jul 2020 15:20:22 -0700 Subject: [PATCH 280/343] kprobes: Do not expose probe addresses to non-CAP_SYSLOG The kprobe show() functions were using "current"'s creds instead of the file opener's creds for kallsyms visibility. Fix to use seq_file->file->f_cred. Cc: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 81365a947de4 ("kprobes: Show address of kprobes if kallsyms does") Fixes: ffb9bd68ebdb ("kprobes: Show blacklist addresses as same as kallsyms does") Signed-off-by: Kees Cook --- kernel/kprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d4de217e4a91..2e97febeef77 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2448,7 +2448,7 @@ static void report_probe(struct seq_file *pi, struct kprobe *p, else kprobe_type = "k"; - if (!kallsyms_show_value(current_cred())) + if (!kallsyms_show_value(pi->file->f_cred)) addr = NULL; if (sym) @@ -2540,7 +2540,7 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) * If /proc/kallsyms is not showing kernel address, we won't * show them here either. */ - if (!kallsyms_show_value(current_cred())) + if (!kallsyms_show_value(m->file->f_cred)) seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL, (void *)ent->start_addr); else From 63960260457a02af2a6cb35d75e6bdb17299c882 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Jul 2020 15:45:23 -0700 Subject: [PATCH 281/343] bpf: Check correct cred for CAP_SYSLOG in bpf_dump_raw_ok() When evaluating access control over kallsyms visibility, credentials at open() time need to be used, not the "current" creds (though in BPF's case, this has likely always been the same). Plumb access to associated file->f_cred down through bpf_dump_raw_ok() and its callers now that kallsysm_show_value() has been refactored to take struct cred. Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: bpf@vger.kernel.org Cc: stable@vger.kernel.org Fixes: 7105e828c087 ("bpf: allow for correlation of maps and helpers in dump") Signed-off-by: Kees Cook --- include/linux/filter.h | 4 ++-- kernel/bpf/syscall.c | 37 +++++++++++++++++++++---------------- net/core/sysctl_net_core.c | 2 +- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 55104f6c78e8..0b0144752d78 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -884,12 +884,12 @@ void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); bool bpf_helper_changes_pkt_data(void *func); -static inline bool bpf_dump_raw_ok(void) +static inline bool bpf_dump_raw_ok(const struct cred *cred) { /* Reconstruction of call-sites is dependent on kallsyms, * thus make dump the same restriction. */ - return kallsyms_show_value(current_cred()); + return kallsyms_show_value(cred); } struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8da159936bab..859053ddf05b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3139,7 +3139,8 @@ static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, return NULL; } -static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) +static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, + const struct cred *f_cred) { const struct bpf_map *map; struct bpf_insn *insns; @@ -3165,7 +3166,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) code == (BPF_JMP | BPF_CALL_ARGS)) { if (code == (BPF_JMP | BPF_CALL_ARGS)) insns[i].code = BPF_JMP | BPF_CALL; - if (!bpf_dump_raw_ok()) + if (!bpf_dump_raw_ok(f_cred)) insns[i].imm = 0; continue; } @@ -3221,7 +3222,8 @@ static int set_info_rec_size(struct bpf_prog_info *info) return 0; } -static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, +static int bpf_prog_get_info_by_fd(struct file *file, + struct bpf_prog *prog, const union bpf_attr *attr, union bpf_attr __user *uattr) { @@ -3290,11 +3292,11 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, struct bpf_insn *insns_sanitized; bool fault; - if (prog->blinded && !bpf_dump_raw_ok()) { + if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) { info.xlated_prog_insns = 0; goto done; } - insns_sanitized = bpf_insn_prepare_dump(prog); + insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); if (!insns_sanitized) return -ENOMEM; uinsns = u64_to_user_ptr(info.xlated_prog_insns); @@ -3328,7 +3330,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, } if (info.jited_prog_len && ulen) { - if (bpf_dump_raw_ok()) { + if (bpf_dump_raw_ok(file->f_cred)) { uinsns = u64_to_user_ptr(info.jited_prog_insns); ulen = min_t(u32, info.jited_prog_len, ulen); @@ -3363,7 +3365,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, ulen = info.nr_jited_ksyms; info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; if (ulen) { - if (bpf_dump_raw_ok()) { + if (bpf_dump_raw_ok(file->f_cred)) { unsigned long ksym_addr; u64 __user *user_ksyms; u32 i; @@ -3394,7 +3396,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, ulen = info.nr_jited_func_lens; info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; if (ulen) { - if (bpf_dump_raw_ok()) { + if (bpf_dump_raw_ok(file->f_cred)) { u32 __user *user_lens; u32 func_len, i; @@ -3451,7 +3453,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, else info.nr_jited_line_info = 0; if (info.nr_jited_line_info && ulen) { - if (bpf_dump_raw_ok()) { + if (bpf_dump_raw_ok(file->f_cred)) { __u64 __user *user_linfo; u32 i; @@ -3497,7 +3499,8 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, return 0; } -static int bpf_map_get_info_by_fd(struct bpf_map *map, +static int bpf_map_get_info_by_fd(struct file *file, + struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr) { @@ -3540,7 +3543,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, return 0; } -static int bpf_btf_get_info_by_fd(struct btf *btf, +static int bpf_btf_get_info_by_fd(struct file *file, + struct btf *btf, const union bpf_attr *attr, union bpf_attr __user *uattr) { @@ -3555,7 +3559,8 @@ static int bpf_btf_get_info_by_fd(struct btf *btf, return btf_get_info_by_fd(btf, attr, uattr); } -static int bpf_link_get_info_by_fd(struct bpf_link *link, +static int bpf_link_get_info_by_fd(struct file *file, + struct bpf_link *link, const union bpf_attr *attr, union bpf_attr __user *uattr) { @@ -3608,15 +3613,15 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, return -EBADFD; if (f.file->f_op == &bpf_prog_fops) - err = bpf_prog_get_info_by_fd(f.file->private_data, attr, + err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr, uattr); else if (f.file->f_op == &bpf_map_fops) - err = bpf_map_get_info_by_fd(f.file->private_data, attr, + err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr, uattr); else if (f.file->f_op == &btf_fops) - err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr); + err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr); else if (f.file->f_op == &bpf_link_fops) - err = bpf_link_get_info_by_fd(f.file->private_data, + err = bpf_link_get_info_by_fd(f.file, f.file->private_data, attr, uattr); else err = -EINVAL; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f93f8ace6c56..6ada114bbcca 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -274,7 +274,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && !ret) { if (jit_enable < 2 || - (jit_enable == 2 && bpf_dump_raw_ok())) { + (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { *(int *)table->data = jit_enable; if (jit_enable == 2) pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); From 2c79583927bb8154ecaa45a67dde97661d895ecd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 3 Jul 2020 11:25:09 -0700 Subject: [PATCH 282/343] selftests: kmod: Add module address visibility test Make sure we don't regress the CAP_SYSLOG behavior of the module address visibility via /proc/modules nor /sys/module/*/sections/*. Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook --- tools/testing/selftests/kmod/kmod.sh | 36 ++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 3702dbcc90a7..c82aa77958e5 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -63,6 +63,8 @@ ALL_TESTS="$ALL_TESTS 0008:150:1" ALL_TESTS="$ALL_TESTS 0009:150:1" ALL_TESTS="$ALL_TESTS 0010:1:1" ALL_TESTS="$ALL_TESTS 0011:1:1" +ALL_TESTS="$ALL_TESTS 0012:1:1" +ALL_TESTS="$ALL_TESTS 0013:1:1" # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -470,6 +472,38 @@ kmod_test_0011() echo "$MODPROBE" > /proc/sys/kernel/modprobe } +kmod_check_visibility() +{ + local name="$1" + local cmd="$2" + + modprobe $DEFAULT_KMOD_DRIVER + + local priv=$(eval $cmd) + local unpriv=$(capsh --drop=CAP_SYSLOG -- -c "$cmd") + + if [ "$priv" = "$unpriv" ] || \ + [ "${priv:0:3}" = "0x0" ] || \ + [ "${unpriv:0:3}" != "0x0" ] ; then + echo "${FUNCNAME[0]}: FAIL, $name visible to unpriv: '$priv' vs '$unpriv'" >&2 + exit 1 + else + echo "${FUNCNAME[0]}: OK!" + fi +} + +kmod_test_0012() +{ + kmod_check_visibility /proc/modules \ + "grep '^${DEFAULT_KMOD_DRIVER}\b' /proc/modules | awk '{print \$NF}'" +} + +kmod_test_0013() +{ + kmod_check_visibility '/sys/module/*/sections/*' \ + "cat /sys/module/${DEFAULT_KMOD_DRIVER}/sections/.*text | head -n1" +} + list_tests() { echo "Test ID list:" @@ -489,6 +523,8 @@ list_tests() echo "0009 x $(get_test_count 0009) - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()" echo "0010 x $(get_test_count 0010) - test nonexistent modprobe path" echo "0011 x $(get_test_count 0011) - test completely disabling module autoloading" + echo "0012 x $(get_test_count 0012) - test /proc/modules address visibility under CAP_SYSLOG" + echo "0013 x $(get_test_count 0013) - test /sys/module/*/sections/* visibility under CAP_SYSLOG" } usage() From 530c8632b547ff72f11ff83654b22462a73f1f7b Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 7 Jul 2020 14:06:11 +0300 Subject: [PATCH 283/343] IB/mlx5: Fix 50G per lane indication Some released FW versions mistakenly don't set the capability that 50G per lane link-modes are supported for VFs (ptys_extended_ethernet capability bit). Use PTYS.ext_eth_proto_capability instead, as this indication is always accurate. If PTYS.ext_eth_proto_capability is valid (has a non-zero value) conclude that the HCA supports 50G per lane. Otherwise, conclude that the HCA doesn't support 50G per lane. Fixes: 08e8676f1607 ("IB/mlx5: Add support for 50Gbps per lane link modes") Link: https://lore.kernel.org/r/20200707110612.882962-3-leon@kernel.org Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 343a8b8361e7..6f99ed03d88e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -511,7 +511,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, mdev_port_num); if (err) goto out; - ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet); + ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability); eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); props->active_width = IB_WIDTH_4X; From 0a03715068794e4b524f66ebbf412ab1f2933f3f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 7 Jul 2020 14:06:12 +0300 Subject: [PATCH 284/343] RDMA/mlx5: Set PD pointers for the error flow unwind ib_pd is accessed internally during destroy of the TIR/TIS, but PD can be not set yet. This leading to the following kernel panic. BUG: kernel NULL pointer dereference, address: 0000000000000074 PGD 8000000079eaa067 P4D 8000000079eaa067 PUD 7ae81067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 1 PID: 709 Comm: syz-executor.0 Not tainted 5.8.0-rc3 #41 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 RIP: 0010:destroy_raw_packet_qp_tis drivers/infiniband/hw/mlx5/qp.c:1189 [inline] RIP: 0010:destroy_raw_packet_qp drivers/infiniband/hw/mlx5/qp.c:1527 [inline] RIP: 0010:destroy_qp_common+0x2ca/0x4f0 drivers/infiniband/hw/mlx5/qp.c:2397 Code: 00 85 c0 74 2e e8 56 18 55 ff 48 8d b3 28 01 00 00 48 89 ef e8 d7 d3 ff ff 48 8b 43 08 8b b3 c0 01 00 00 48 8b bd a8 0a 00 00 <0f> b7 50 74 e8 0d 6a fe ff e8 28 18 55 ff 49 8d 55 50 4c 89 f1 48 RSP: 0018:ffffc900007bbac8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff88807949e800 RCX: 0000000000000998 RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88807c180140 RBP: ffff88807b50c000 R08: 000000000002d379 R09: ffffc900007bba00 R10: 0000000000000001 R11: 000000000002d358 R12: ffff888076f37000 R13: ffff88807949e9c8 R14: ffffc900007bbe08 R15: ffff888076f37000 FS: 00000000019bf940(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000074 CR3: 0000000076d68004 CR4: 0000000000360ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: mlx5_ib_create_qp+0xf36/0xf90 drivers/infiniband/hw/mlx5/qp.c:3014 _ib_create_qp drivers/infiniband/core/core_priv.h:333 [inline] create_qp+0x57f/0xd20 drivers/infiniband/core/uverbs_cmd.c:1443 ib_uverbs_create_qp+0xcf/0x100 drivers/infiniband/core/uverbs_cmd.c:1564 ib_uverbs_write+0x5fa/0x780 drivers/infiniband/core/uverbs_main.c:664 __vfs_write+0x3f/0x90 fs/read_write.c:495 vfs_write+0xc7/0x1f0 fs/read_write.c:559 ksys_write+0x5e/0x110 fs/read_write.c:612 do_syscall_64+0x3e/0x70 arch/x86/entry/common.c:359 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x466479 Code: Bad RIP value. RSP: 002b:00007ffd057b62b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000466479 RDX: 0000000000000070 RSI: 0000000020000240 RDI: 0000000000000003 RBP: 00000000019bf8fc R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000000bf6 R14: 00000000004cb859 R15: 00000000006fefc0 Fixes: 6c41965d647a ("RDMA/mlx5: Don't access ib_qp fields in internal destroy QP path") Link: https://lore.kernel.org/r/20200707110612.882962-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index b316c9cafbc5..e050eade97a1 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3005,11 +3005,12 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, mlx5_ib_destroy_dct(qp); } else { /* - * The two lines below are temp solution till QP allocation + * These lines below are temp solution till QP allocation * will be moved to be under IB/core responsiblity. */ qp->ibqp.send_cq = attr->send_cq; qp->ibqp.recv_cq = attr->recv_cq; + qp->ibqp.pd = pd; destroy_qp_common(dev, qp, udata); } From 813357fead4adee73f7eca6bbe0e69dfcf514dc6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 23 Jun 2020 21:35:26 -0700 Subject: [PATCH 285/343] libnvdimm/security: Fix key lookup permissions As of commit 8c0637e950d6 ("keys: Make the KEY_NEED_* perms an enum rather than a mask") lookup_user_key() needs an explicit declaration of what it wants to do with the key. Add KEY_NEED_SEARCH to fix a warning with the below signature, and fixes the inability to retrieve a key. WARNING: CPU: 15 PID: 6276 at security/keys/permission.c:35 key_task_permission+0xd3/0x140 [..] RIP: 0010:key_task_permission+0xd3/0x140 [..] Call Trace: lookup_user_key+0xeb/0x6b0 ? vsscanf+0x3df/0x840 ? key_validate+0x50/0x50 ? key_default_cmp+0x20/0x20 nvdimm_get_user_key_payload.part.0+0x21/0x110 [libnvdimm] nvdimm_security_store+0x67d/0xb20 [libnvdimm] security_store+0x67/0x1a0 [libnvdimm] kernfs_fop_write+0xcf/0x1c0 vfs_write+0xde/0x1d0 ksys_write+0x68/0xe0 do_syscall_64+0x5c/0xa0 entry_SYSCALL_64_after_hwframe+0x49/0xb3 Fixes: 8c0637e950d6 ("keys: Make the KEY_NEED_* perms an enum rather than a mask") Suggested-by: David Howells Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Cc: Dan Williams Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Link: https://lore.kernel.org/r/159297332630.1304143.237026690015653759.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/security.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index 89b85970912d..4cef69bd3c1b 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -95,7 +95,7 @@ static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm, struct encrypted_key_payload *epayload; struct device *dev = &nvdimm->dev; - keyref = lookup_user_key(id, 0, 0); + keyref = lookup_user_key(id, 0, KEY_NEED_SEARCH); if (IS_ERR(keyref)) return NULL; From 6d5f904904608a9cd32854d7d0a4dd65b27f9935 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Thu, 9 Jul 2020 09:15:29 +0800 Subject: [PATCH 286/343] io_uring: export cq overflow status to userspace For those applications which are not willing to use io_uring_enter() to reap and handle cqes, they may completely rely on liburing's io_uring_peek_cqe(), but if cq ring has overflowed, currently because io_uring_peek_cqe() is not aware of this overflow, it won't enter kernel to flush cqes, below test program can reveal this bug: static void test_cq_overflow(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int issued = 0; int ret = 0; do { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); break;; } ret = io_uring_submit(ring); if (ret <= 0) { if (ret != -EBUSY) fprintf(stderr, "sqe submit failed: %d\n", ret); break; } issued++; } while (ret > 0); assert(ret == -EBUSY); printf("issued requests: %d\n", issued); while (issued) { ret = io_uring_peek_cqe(ring, &cqe); if (ret) { if (ret != -EAGAIN) { fprintf(stderr, "peek completion failed: %s\n", strerror(ret)); break; } printf("left requets: %d\n", issued); continue; } io_uring_cqe_seen(ring, cqe); issued--; printf("left requets: %d\n", issued); } } int main(int argc, char *argv[]) { int ret; struct io_uring ring; ret = io_uring_queue_init(16, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } test_cq_overflow(&ring); return 0; } To fix this issue, export cq overflow status to userspace by adding new IORING_SQ_CQ_OVERFLOW flag, then helper functions() in liburing, such as io_uring_peek_cqe, can be aware of this cq overflow and do flush accordingly. Signed-off-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- fs/io_uring.c | 11 +++++++++-- include/uapi/linux/io_uring.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d37d7ea5ebe5..32e37c38f274 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1274,6 +1274,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) if (cqe) { clear_bit(0, &ctx->sq_check_overflow); clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); @@ -1311,6 +1312,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) if (list_empty(&ctx->cq_overflow_list)) { set_bit(0, &ctx->sq_check_overflow); set_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; } req->flags |= REQ_F_OVERFLOW; refcount_inc(&req->refs); @@ -6080,9 +6082,9 @@ static int io_sq_thread(void *data) } /* Tell userspace we may need a wakeup call */ + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; - /* make sure to read SQ tail after writing flags */ - smp_mb(); + spin_unlock_irq(&ctx->completion_lock); to_submit = io_sqring_entries(ctx); if (!to_submit || ret == -EBUSY) { @@ -6100,13 +6102,17 @@ static int io_sq_thread(void *data) schedule(); finish_wait(&ctx->sqo_wait, &wait); + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + spin_unlock_irq(&ctx->completion_lock); ret = 0; continue; } finish_wait(&ctx->sqo_wait, &wait); + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + spin_unlock_irq(&ctx->completion_lock); } mutex_lock(&ctx->uring_lock); @@ -7488,6 +7494,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, if (list_empty(&ctx->cq_overflow_list)) { clear_bit(0, &ctx->sq_check_overflow); clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } spin_unlock_irq(&ctx->completion_lock); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 92c22699a5a7..7843742b8b74 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -197,6 +197,7 @@ struct io_sqring_offsets { * sq_ring->flags */ #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ +#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */ struct io_cqring_offsets { __u32 head; From f3bd9dae3708a0ff6b067e766073ffeb853301f9 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 9 Jul 2020 10:11:41 +0000 Subject: [PATCH 287/343] io_uring: fix memleak in __io_sqe_files_update() I got a memleak report when doing some fuzz test: BUG: memory leak unreferenced object 0xffff888113e02300 (size 488): comm "syz-executor401", pid 356, jiffies 4294809529 (age 11.954s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ a0 a4 ce 19 81 88 ff ff 60 ce 09 0d 81 88 ff ff ........`....... backtrace: [<00000000129a84ec>] kmem_cache_zalloc include/linux/slab.h:659 [inline] [<00000000129a84ec>] __alloc_file+0x25/0x310 fs/file_table.c:101 [<000000003050ad84>] alloc_empty_file+0x4f/0x120 fs/file_table.c:151 [<000000004d0a41a3>] alloc_file+0x5e/0x550 fs/file_table.c:193 [<000000002cb242f0>] alloc_file_pseudo+0x16a/0x240 fs/file_table.c:233 [<00000000046a4baa>] anon_inode_getfile fs/anon_inodes.c:91 [inline] [<00000000046a4baa>] anon_inode_getfile+0xac/0x1c0 fs/anon_inodes.c:74 [<0000000035beb745>] __do_sys_perf_event_open+0xd4a/0x2680 kernel/events/core.c:11720 [<0000000049009dc7>] do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:359 [<00000000353731ca>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 BUG: memory leak unreferenced object 0xffff8881152dd5e0 (size 16): comm "syz-executor401", pid 356, jiffies 4294809529 (age 11.954s) hex dump (first 16 bytes): 01 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000074caa794>] kmem_cache_zalloc include/linux/slab.h:659 [inline] [<0000000074caa794>] lsm_file_alloc security/security.c:567 [inline] [<0000000074caa794>] security_file_alloc+0x32/0x160 security/security.c:1440 [<00000000c6745ea3>] __alloc_file+0xba/0x310 fs/file_table.c:106 [<000000003050ad84>] alloc_empty_file+0x4f/0x120 fs/file_table.c:151 [<000000004d0a41a3>] alloc_file+0x5e/0x550 fs/file_table.c:193 [<000000002cb242f0>] alloc_file_pseudo+0x16a/0x240 fs/file_table.c:233 [<00000000046a4baa>] anon_inode_getfile fs/anon_inodes.c:91 [inline] [<00000000046a4baa>] anon_inode_getfile+0xac/0x1c0 fs/anon_inodes.c:74 [<0000000035beb745>] __do_sys_perf_event_open+0xd4a/0x2680 kernel/events/core.c:11720 [<0000000049009dc7>] do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:359 [<00000000353731ca>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 If io_sqe_file_register() failed, we need put the file that get by fget() to avoid the memleak. Fixes: c3a31e605620 ("io_uring: add support for IORING_REGISTER_FILES_UPDATE") Cc: stable@vger.kernel.org Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 32e37c38f274..a9ce2e6f03dd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6851,8 +6851,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, } table->files[index] = file; err = io_sqe_file_register(ctx, file, i); - if (err) + if (err) { + fput(file); break; + } } nr_args--; done++; From 09c717c92b52df54918e12cbfe6a4658233fda69 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 8 Jul 2020 22:13:40 -0700 Subject: [PATCH 288/343] arm64: Add missing sentinel to erratum_1463225 When the erratum_1463225 array was introduced a sentinel at the end was missing thus causing a KASAN: global-out-of-bounds in is_affected_midr_range_list on arm64 error. Fixes: a9e821b89daa ("arm64: Add KRYO4XX gold CPU cores to erratum list 1463225 and 1418040") Signed-off-by: Florian Fainelli Reviewed-by: Sai Prakash Ranjan Link: https://lore.kernel.org/linux-arm-kernel/CA+G9fYs3EavpU89-rTQfqQ9GgxAMgMAk7jiiVrfP0yxj5s+Q6g@mail.gmail.com/ Link: https://lore.kernel.org/r/20200709051345.14544-1-f.fainelli@gmail.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 8e302dc093d0..79728bfb5351 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -782,6 +782,7 @@ static const struct midr_range erratum_1463225[] = { MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1), /* Kryo4xx Gold (rcpe to rfpf) => (r0p0 to r3p1) */ MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xf), + {}, }; #endif From 528a9539348a0234375dfaa1ca5dbbb2f8f8e8d2 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 7 Jul 2020 15:38:54 +0200 Subject: [PATCH 289/343] s390/mm: fix huge pte soft dirty copying If the pmd is soft dirty we must mark the pte as soft dirty (and not dirty). This fixes some cases for guest migration with huge page backings. Cc: # 4.8 Fixes: bc29b7ac1d9f ("s390/mm: clean up pte/pmd encoding") Reviewed-by: Christian Borntraeger Reviewed-by: Gerald Schaefer Signed-off-by: Janosch Frank Signed-off-by: Heiko Carstens --- arch/s390/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 82df06d720e8..3b5a4d25ca9b 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -117,7 +117,7 @@ static inline pte_t __rste_to_pte(unsigned long rste) _PAGE_YOUNG); #ifdef CONFIG_MEM_SOFT_DIRTY pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, - _PAGE_DIRTY); + _PAGE_SOFT_DIRTY); #endif pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); From 5679b28142193a62f6af93249c0477be9f0c669b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 9 Jul 2020 15:59:53 +0300 Subject: [PATCH 290/343] arm64/alternatives: don't patch up internal branches Commit f7b93d42945c ("arm64/alternatives: use subsections for replacement sequences") moved the alternatives replacement sequences into subsections, in order to keep the as close as possible to the code that they replace. Unfortunately, this broke the logic in branch_insn_requires_update, which assumed that any branch into kernel executable code was a branch that required updating, which is no longer the case now that the code sequences that are patched in are in the same section as the patch site itself. So the only way to discriminate branches that require updating and ones that don't is to check whether the branch targets the replacement sequence itself, and so we can drop the call to kernel_text_address() entirely. Fixes: f7b93d42945c ("arm64/alternatives: use subsections for replacement sequences") Reported-by: Alexandru Elisei Signed-off-by: Ard Biesheuvel Tested-by: Alexandru Elisei Link: https://lore.kernel.org/r/20200709125953.30918-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/alternative.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index d1757ef1b1e7..73039949b5ce 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -43,20 +43,8 @@ bool alternative_is_applied(u16 cpufeature) */ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) { - unsigned long replptr; - - if (kernel_text_address(pc)) - return true; - - replptr = (unsigned long)ALT_REPL_PTR(alt); - if (pc >= replptr && pc <= (replptr + alt->alt_len)) - return false; - - /* - * Branching into *another* alternate sequence is doomed, and - * we're not even trying to fix it up. - */ - BUG(); + unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt); + return !(pc >= replptr && pc <= (replptr + alt->alt_len)); } #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) From a77592a70081edb58a95b9da18fd5a2882a25666 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 9 Jul 2020 20:39:49 +1000 Subject: [PATCH 291/343] cifs: fix reference leak for tlink Don't leak a reference to tlink during the NOTIFY ioctl Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Aurelien Aptel CC: Stable # v5.6+ --- fs/cifs/ioctl.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 4a73e63c4d43..dcde44ff6cf9 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -169,6 +169,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) unsigned int xid; struct cifsFileInfo *pSMBFile = filep->private_data; struct cifs_tcon *tcon; + struct tcon_link *tlink; struct cifs_sb_info *cifs_sb; __u64 ExtAttrBits = 0; __u64 caps; @@ -307,13 +308,19 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) break; } cifs_sb = CIFS_SB(inode->i_sb); - tcon = tlink_tcon(cifs_sb_tlink(cifs_sb)); + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + break; + } + tcon = tlink_tcon(tlink); if (tcon && tcon->ses->server->ops->notify) { rc = tcon->ses->server->ops->notify(xid, filep, (void __user *)arg); cifs_dbg(FYI, "ioctl notify rc %d\n", rc); } else rc = -EOPNOTSUPP; + cifs_put_tlink(tlink); break; default: cifs_dbg(FYI, "unsupported ioctl\n"); From a8dab63ea623610bb258d93649e30330dd1b7c8b Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 7 Jul 2020 23:52:07 -0500 Subject: [PATCH 292/343] cifs: update internal module version number To 2.28 Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index c7a311d28d3d..99b3180c613a 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -156,5 +156,5 @@ extern int cifs_truncate_page(struct address_space *mapping, loff_t from); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.27" +#define CIFS_VERSION "2.28" #endif /* _CIFSFS_H */ From 230ed397435e85b54f055c524fcb267ae2ce3bc4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 6 Jul 2020 09:14:12 -0400 Subject: [PATCH 293/343] btrfs: fix double put of block group with nocow While debugging a patch that I wrote I was hitting use-after-free panics when accessing block groups on unmount. This turned out to be because in the nocow case if we bail out of doing the nocow for whatever reason we need to call btrfs_dec_nocow_writers() if we called the inc. This puts our block group, but a few error cases does if (nocow) { btrfs_dec_nocow_writers(); goto error; } unfortunately, error is error: if (nocow) btrfs_dec_nocow_writers(); so we get a double put on our block group. Fix this by dropping the error cases calling of btrfs_dec_nocow_writers(), as it's handled at the error label now. Fixes: 762bf09893b4 ("btrfs: improve error handling in run_delalloc_nocow") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cfa863d2d97c..11f81a148350 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1690,12 +1690,8 @@ static noinline int run_delalloc_nocow(struct inode *inode, ret = fallback_to_cow(inode, locked_page, cow_start, found_key.offset - 1, page_started, nr_written); - if (ret) { - if (nocow) - btrfs_dec_nocow_writers(fs_info, - disk_bytenr); + if (ret) goto error; - } cow_start = (u64)-1; } @@ -1711,9 +1707,6 @@ static noinline int run_delalloc_nocow(struct inode *inode, ram_bytes, BTRFS_COMPRESS_NONE, BTRFS_ORDERED_PREALLOC); if (IS_ERR(em)) { - if (nocow) - btrfs_dec_nocow_writers(fs_info, - disk_bytenr); ret = PTR_ERR(em); goto error; } From d77765911385b65fc82d74ab71b8983cddfe0b58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Jul 2020 18:22:06 +0200 Subject: [PATCH 294/343] btrfs: wire up iter_file_splice_write btrfs implements the iter_write op and thus can use the more efficient iov_iter based splice implementation. For now falling back to the less efficient default is pretty harmless, but I have a pending series that removes the default, and thus would cause btrfs to not support splice at all. Reported-by: Andy Lavr Tested-by: Andy Lavr Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2520605afc25..b0d2c976587e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3509,6 +3509,7 @@ const struct file_operations btrfs_file_operations = { .read_iter = generic_file_read_iter, .splice_read = generic_file_splice_read, .write_iter = btrfs_file_write_iter, + .splice_write = iter_file_splice_write, .mmap = btrfs_file_mmap, .open = btrfs_file_open, .release = btrfs_release_file, From 20303ec5d2165ee6344190274bc59118921f71d9 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 30 Jun 2020 10:43:21 +0800 Subject: [PATCH 295/343] drm/amdgpu: asd function needs to be unloaded in suspend phase Unload ASD function in suspend phase. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7301fdcfb8bc..83bceb65ff80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1840,6 +1840,12 @@ static int psp_suspend(void *handle) } } + ret = psp_asd_unload(psp); + if (ret) { + DRM_ERROR("Failed to unload asd\n"); + return ret; + } + ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) { DRM_ERROR("PSP ring stop failed\n"); From c564b8601ae917086751d90f464d5f19d731ece7 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 30 Jun 2020 12:32:57 +0800 Subject: [PATCH 296/343] drm/amdgpu: add TMR destory function for psp TMR is required to be destoried with GFX_CMD_ID_DESTROY_TMR while the system goes to suspend. Otherwise, PSP may return the failure state (0xFFFF007) on Gfx-2-PSP command GFX_CMD_ID_SETUP_TMR after do multiple times suspend/resume. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 57 +++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 83bceb65ff80..ef3269c43d4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -372,6 +372,52 @@ static int psp_tmr_load(struct psp_context *psp) return ret; } +static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp, + struct psp_gfx_cmd_resp *cmd) +{ + if (amdgpu_sriov_vf(psp->adev)) + cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR; + else + cmd->cmd_id = GFX_CMD_ID_DESTROY_TMR; +} + +static int psp_tmr_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_tmr_unload_cmd_buf(psp, cmd); + DRM_INFO("free PSP TMR buffer\n"); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static int psp_tmr_terminate(struct psp_context *psp) +{ + int ret; + void *tmr_buf; + void **pptr; + + ret = psp_tmr_unload(psp); + if (ret) + return ret; + + /* free TMR memory buffer */ + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); + + return 0; +} + static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint64_t asd_mc, uint32_t size) { @@ -1779,8 +1825,6 @@ static int psp_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - void *tmr_buf; - void **pptr; if (psp->adev->psp.ta_fw) { psp_ras_terminate(psp); @@ -1790,10 +1834,9 @@ static int psp_hw_fini(void *handle) psp_asd_unload(psp); + psp_tmr_terminate(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); - pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); amdgpu_bo_free_kernel(&psp->fence_buf_bo, @@ -1846,6 +1889,12 @@ static int psp_suspend(void *handle) return ret; } + ret = psp_tmr_terminate(psp); + if (ret) { + DRM_ERROR("Falied to terminate tmr\n"); + return ret; + } + ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) { DRM_ERROR("PSP ring stop failed\n"); From 3b2e973dff59d88bee1d814ddf8762a24fc02b60 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Wed, 8 Jul 2020 04:16:22 -0400 Subject: [PATCH 297/343] drm/amd/display: add dmcub check on RENOIR RENOIR loads dmub fw not dmcu, check dmcu only will prevent loading iram, it breaks backlight control. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=208277 Acked-by: Alex Deucher Reviewed-by: Nicholas Kazlauskas Signed-off-by: Aaron Ma Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 10ac8076d4f2..db5e0bb0d935 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1358,7 +1358,7 @@ static int dm_late_init(void *handle) struct dmcu *dmcu = NULL; bool ret; - if (!adev->dm.fw_dmcu) + if (!adev->dm.fw_dmcu && !adev->dm.dmub_fw) return detect_mst_link_for_all_connectors(adev->ddev); dmcu = adev->dm.dc->res_pool->dmcu; From 41855a898650803e24b284173354cc3e44d07725 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 6 Jul 2020 05:28:57 -0700 Subject: [PATCH 298/343] drm/radeon: fix double free clang static analysis flags this error drivers/gpu/drm/radeon/ci_dpm.c:5652:9: warning: Use of memory after it is freed [unix.Malloc] kfree(rdev->pm.dpm.ps[i].ps_priv); ^~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/radeon/ci_dpm.c:5654:2: warning: Attempt to free released memory [unix.Malloc] kfree(rdev->pm.dpm.ps); ^~~~~~~~~~~~~~~~~~~~~~ problem is reported in ci_dpm_fini, with these code blocks. for (i = 0; i < rdev->pm.dpm.num_ps; i++) { kfree(rdev->pm.dpm.ps[i].ps_priv); } kfree(rdev->pm.dpm.ps); The first free happens in ci_parse_power_table where it cleans up locally on a failure. ci_dpm_fini also does a cleanup. ret = ci_parse_power_table(rdev); if (ret) { ci_dpm_fini(rdev); return ret; } So remove the cleanup in ci_parse_power_table and move the num_ps calculation to inside the loop so ci_dpm_fini will know how many array elements to free. Fixes: cc8dbbb4f62a ("drm/radeon: add dpm support for CI dGPUs (v2)") Signed-off-by: Tom Rix Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/ci_dpm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 134aa2b01f90..f434efdeca44 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5563,6 +5563,7 @@ static int ci_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; + rdev->pm.dpm.num_ps = 0; for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -5572,10 +5573,8 @@ static int ci_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.power_state[i].clock_info) return -EINVAL; ps = kzalloc(sizeof(struct ci_ps), GFP_KERNEL); - if (ps == NULL) { - kfree(rdev->pm.dpm.ps); + if (ps == NULL) return -ENOMEM; - } rdev->pm.dpm.ps[i].ps_priv = ps; ci_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i], non_clock_info, @@ -5597,8 +5596,8 @@ static int ci_parse_power_table(struct radeon_device *rdev) k++; } power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + rdev->pm.dpm.num_ps = i + 1; } - rdev->pm.dpm.num_ps = state_array->ucNumEntries; /* fill in the vce power states */ for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { From f4892c327a8e5df7ce16cab40897daf90baf6bec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 6 Jul 2020 18:23:17 -0400 Subject: [PATCH 299/343] drm/amdgpu: don't do soft recovery if gpu_recovery=0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's impossible to debug shader hangs with soft recovery. Signed-off-by: Marek Olšák Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 47207188c569..4fb4c3b69687 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -37,7 +37,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) memset(&ti, 0, sizeof(struct amdgpu_task_info)); - if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { + if (amdgpu_gpu_recovery && + amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { DRM_ERROR("ring %s timeout, but soft recovered\n", s_job->sched->name); return; From 13cf8aab7425a253070433b5a55b4209ceac8b19 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 8 Jul 2020 20:14:29 -0700 Subject: [PATCH 300/343] qed: Populate nvm-file attributes while reading nvm config partition. NVM config file address will be modified when the MBI image is upgraded. Driver would return stale config values if user reads the nvm-config (via ethtool -d) in this state. The fix is to re-populate nvm attribute info while reading the nvm config values/partition. Changes from previous version: ------------------------------- v3: Corrected the formatting in 'Fixes' tag. v2: Added 'Fixes' tag. Fixes: 1ac4329a1cff ("qed: Add configuration information to register dump and debug data") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_debug.c | 4 ++++ drivers/net/ethernet/qlogic/qed/qed_dev.c | 12 +++--------- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 7 +++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.h | 7 +++++++ 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index cb80863d5a77..3b9bbafafe68 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -7941,6 +7941,10 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) DP_ERR(cdev, "qed_dbg_mcp_trace failed. rc = %d\n", rc); } + /* Re-populate nvm attribute info */ + qed_mcp_nvm_info_free(p_hwfn); + qed_mcp_nvm_info_populate(p_hwfn); + /* nvm cfg1 */ rc = qed_dbg_nvm_image(cdev, (u8 *)buffer + offset + diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 3aa51374e727..9c26fde663b3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -4472,12 +4472,6 @@ static int qed_get_dev_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return 0; } -static void qed_nvm_info_free(struct qed_hwfn *p_hwfn) -{ - kfree(p_hwfn->nvm_info.image_att); - p_hwfn->nvm_info.image_att = NULL; -} - static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, void __iomem *p_regview, void __iomem *p_doorbells, @@ -4562,7 +4556,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, return rc; err3: if (IS_LEAD_HWFN(p_hwfn)) - qed_nvm_info_free(p_hwfn); + qed_mcp_nvm_info_free(p_hwfn); err2: if (IS_LEAD_HWFN(p_hwfn)) qed_iov_free_hw_info(p_hwfn->cdev); @@ -4623,7 +4617,7 @@ int qed_hw_prepare(struct qed_dev *cdev, if (rc) { if (IS_PF(cdev)) { qed_init_free(p_hwfn); - qed_nvm_info_free(p_hwfn); + qed_mcp_nvm_info_free(p_hwfn); qed_mcp_free(p_hwfn); qed_hw_hwfn_free(p_hwfn); } @@ -4657,7 +4651,7 @@ void qed_hw_remove(struct qed_dev *cdev) qed_iov_free_hw_info(cdev); - qed_nvm_info_free(p_hwfn); + qed_mcp_nvm_info_free(p_hwfn); } static void qed_chain_free_next_ptr(struct qed_dev *cdev, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 9624616806e7..0fd4520d0666 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -3280,6 +3280,13 @@ int qed_mcp_nvm_info_populate(struct qed_hwfn *p_hwfn) return rc; } +void qed_mcp_nvm_info_free(struct qed_hwfn *p_hwfn) +{ + kfree(p_hwfn->nvm_info.image_att); + p_hwfn->nvm_info.image_att = NULL; + p_hwfn->nvm_info.valid = false; +} + int qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, enum qed_nvm_images image_id, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 5750b4c5ef63..12a705ed4bac 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -1220,6 +1220,13 @@ void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); */ int qed_mcp_nvm_info_populate(struct qed_hwfn *p_hwfn); +/** + * @brief Delete nvm info shadow in the given hardware function + * + * @param p_hwfn + */ +void qed_mcp_nvm_info_free(struct qed_hwfn *p_hwfn); + /** * @brief Get the engine affinity configuration. * From 306381aec7c2b5a658eebca008c8a1b666536cba Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 8 Jul 2020 20:13:59 -0700 Subject: [PATCH 301/343] net_sched: fix a memory leak in atm_tc_init() When tcf_block_get() fails inside atm_tc_init(), atm_tc_put() is called to release the qdisc p->link.q. But the flow->ref prevents it to do so, as the flow->ref is still zero. Fix this by moving the p->link.ref initialization before tcf_block_get(). Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure") Reported-and-tested-by: syzbot+d411cff6ab29cc2c311b@syzkaller.appspotmail.com Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ee12ca9f55b4..1c281cc81f57 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -553,16 +553,16 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, if (!p->link.q) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); + p->link.vcc = NULL; + p->link.sock = NULL; + p->link.common.classid = sch->handle; + p->link.ref = 1; err = tcf_block_get(&p->link.block, &p->link.filter_list, sch, extack); if (err) return err; - p->link.vcc = NULL; - p->link.sock = NULL; - p->link.common.classid = sch->handle; - p->link.ref = 1; tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); return 0; } From 365f9ae4ee36037e2a9268fe7296065356840b4c Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 9 Jul 2020 12:11:50 +0200 Subject: [PATCH 302/343] ethtool: fix genlmsg_put() failure handling in ethnl_default_dumpit() If the genlmsg_put() call in ethnl_default_dumpit() fails, we bail out without checking if we already have some messages in current skb like we do with ethnl_default_dump_one() failure later. Therefore if existing messages almost fill up the buffer so that there is not enough space even for netlink and genetlink header, we lose all prepared messages and return and error. Rather than duplicating the skb->len check, move the genlmsg_put(), genlmsg_cancel() and genlmsg_end() calls into ethnl_default_dump_one(). This is also more logical as all message composition will be in ethnl_default_dump_one() and only iteration logic will be left in ethnl_default_dumpit(). Fixes: 728480f12442 ("ethtool: default handlers for GET requests") Reported-by: Jakub Kicinski Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/ethtool/netlink.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 88fd07f47040..dd8a1c1dc07d 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -376,10 +376,17 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) } static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, - const struct ethnl_dump_ctx *ctx) + const struct ethnl_dump_ctx *ctx, + struct netlink_callback *cb) { + void *ehdr; int ret; + ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + ðtool_genl_family, 0, ctx->ops->reply_cmd); + if (!ehdr) + return -EMSGSIZE; + ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev); rtnl_lock(); ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL); @@ -395,6 +402,10 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, if (ctx->ops->cleanup_data) ctx->ops->cleanup_data(ctx->reply_data); ctx->reply_data->dev = NULL; + if (ret < 0) + genlmsg_cancel(skb, ehdr); + else + genlmsg_end(skb, ehdr); return ret; } @@ -411,7 +422,6 @@ static int ethnl_default_dumpit(struct sk_buff *skb, int s_idx = ctx->pos_idx; int h, idx = 0; int ret = 0; - void *ehdr; rtnl_lock(); for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { @@ -431,26 +441,15 @@ static int ethnl_default_dumpit(struct sk_buff *skb, dev_hold(dev); rtnl_unlock(); - ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - ðtool_genl_family, 0, - ctx->ops->reply_cmd); - if (!ehdr) { - dev_put(dev); - ret = -EMSGSIZE; - goto out; - } - ret = ethnl_default_dump_one(skb, dev, ctx); + ret = ethnl_default_dump_one(skb, dev, ctx, cb); dev_put(dev); if (ret < 0) { - genlmsg_cancel(skb, ehdr); if (ret == -EOPNOTSUPP) goto lock_and_cont; if (likely(skb->len)) ret = skb->len; goto out; } - genlmsg_end(skb, ehdr); lock_and_cont: rtnl_lock(); if (net->dev_base_seq != seq) { From f3dda7a679df183e798b86e7b6ec05ab35476de3 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 8 Jul 2020 23:11:04 -0700 Subject: [PATCH 303/343] bpf: net: Avoid copying sk_user_data of reuseport_array during sk_clone It makes little sense for copying sk_user_data of reuseport_array during sk_clone_lock(). This patch reuses the SK_USER_DATA_NOCOPY bit introduced in commit f1ff5ce2cd5e ("net, sk_msg: Clear sk_user_data pointer on clone if tagged"). It is used to mark the sk_user_data is not supposed to be copied to its clone. Although the cloned sk's sk_user_data will not be used/freed in bpf_sk_reuseport_detach(), this change can still allow the cloned sk's sk_user_data to be used by some other means. Freeing the reuseport_array's sk_user_data does not require a rcu grace period. Thus, the existing rcu_assign_sk_user_data_nocopy() is not used. Fixes: 5dc4c4b7d4e8 ("bpf: Introduce BPF_MAP_TYPE_REUSEPORT_SOCKARRAY") Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20200709061104.4018798-1-kafai@fb.com --- kernel/bpf/reuseport_array.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 21cde24386db..a95bc8d7e812 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -20,11 +20,14 @@ static struct reuseport_array *reuseport_array(struct bpf_map *map) /* The caller must hold the reuseport_lock */ void bpf_sk_reuseport_detach(struct sock *sk) { - struct sock __rcu **socks; + uintptr_t sk_user_data; write_lock_bh(&sk->sk_callback_lock); - socks = sk->sk_user_data; - if (socks) { + sk_user_data = (uintptr_t)sk->sk_user_data; + if (sk_user_data) { + struct sock __rcu **socks; + + socks = (void *)(sk_user_data & SK_USER_DATA_PTRMASK); WRITE_ONCE(sk->sk_user_data, NULL); /* * Do not move this NULL assignment outside of @@ -252,6 +255,7 @@ int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, struct sock *free_osk = NULL, *osk, *nsk; struct sock_reuseport *reuse; u32 index = *(u32 *)key; + uintptr_t sk_user_data; struct socket *socket; int err, fd; @@ -305,7 +309,8 @@ int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, if (err) goto put_file_unlock; - WRITE_ONCE(nsk->sk_user_data, &array->ptrs[index]); + sk_user_data = (uintptr_t)&array->ptrs[index] | SK_USER_DATA_NOCOPY; + WRITE_ONCE(nsk->sk_user_data, (void *)sk_user_data); rcu_assign_pointer(array->ptrs[index], nsk); free_osk = osk; err = 0; From c9a368f1c0fbe2e3a21ebf231caeae58b18b2681 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 8 Jul 2020 23:11:10 -0700 Subject: [PATCH 304/343] bpf: net: Avoid incorrect bpf_sk_reuseport_detach call bpf_sk_reuseport_detach is currently called when sk->sk_user_data is not NULL. It is incorrect because sk->sk_user_data may not be managed by the bpf's reuseport_array. It has been reported in [1] that, the bpf_sk_reuseport_detach() which is called from udp_lib_unhash() has corrupted the sk_user_data managed by l2tp. This patch solves it by using another bit (defined as SK_USER_DATA_BPF) of the sk_user_data pointer value. It marks that a sk_user_data is managed/owned by BPF. The patch depends on a PTRMASK introduced in commit f1ff5ce2cd5e ("net, sk_msg: Clear sk_user_data pointer on clone if tagged"). [ Note: sk->sk_user_data is used by bpf's reuseport_array only when a sk is added to the bpf's reuseport_array. i.e. doing setsockopt(SO_REUSEPORT) and having "sk->sk_reuseport == 1" alone will not stop sk->sk_user_data being used by other means. ] [1]: https://lore.kernel.org/netdev/20200706121259.GA20199@katalix.com/ Fixes: 5dc4c4b7d4e8 ("bpf: Introduce BPF_MAP_TYPE_REUSEPORT_SOCKARRAY") Reported-by: James Chapman Reported-by: syzbot+9f092552ba9a5efca5df@syzkaller.appspotmail.com Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Tested-by: James Chapman Acked-by: James Chapman Link: https://lore.kernel.org/bpf/20200709061110.4019316-1-kafai@fb.com --- include/net/sock.h | 3 ++- kernel/bpf/reuseport_array.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 3428619faae4..1183507df95b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -533,7 +533,8 @@ enum sk_pacing { * be copied. */ #define SK_USER_DATA_NOCOPY 1UL -#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY) +#define SK_USER_DATA_BPF 2UL /* Managed by BPF */ +#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF) /** * sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index a95bc8d7e812..cae9d505e04a 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -24,7 +24,7 @@ void bpf_sk_reuseport_detach(struct sock *sk) write_lock_bh(&sk->sk_callback_lock); sk_user_data = (uintptr_t)sk->sk_user_data; - if (sk_user_data) { + if (sk_user_data & SK_USER_DATA_BPF) { struct sock __rcu **socks; socks = (void *)(sk_user_data & SK_USER_DATA_PTRMASK); @@ -309,7 +309,8 @@ int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, if (err) goto put_file_unlock; - sk_user_data = (uintptr_t)&array->ptrs[index] | SK_USER_DATA_NOCOPY; + sk_user_data = (uintptr_t)&array->ptrs[index] | SK_USER_DATA_NOCOPY | + SK_USER_DATA_BPF; WRITE_ONCE(nsk->sk_user_data, (void *)sk_user_data); rcu_assign_pointer(array->ptrs[index], nsk); free_osk = osk; From ce69e563b325f620863830c246a8698ccea52048 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 8 Jul 2020 16:18:34 -0700 Subject: [PATCH 305/343] tcp: make sure listeners don't initialize congestion-control state syzkaller found its way into setsockopt with TCP_CONGESTION "cdg". tcp_cdg_init() does a kcalloc to store the gradients. As sk_clone_lock just copies all the memory, the allocated pointer will be copied as well, if the app called setsockopt(..., TCP_CONGESTION) on the listener. If now the socket will be destroyed before the congestion-control has properly been initialized (through a call to tcp_init_transfer), we will end up freeing memory that does not belong to that particular socket, opening the door to a double-free: [ 11.413102] ================================================================== [ 11.414181] BUG: KASAN: double-free or invalid-free in tcp_cleanup_congestion_control+0x58/0xd0 [ 11.415329] [ 11.415560] CPU: 3 PID: 4884 Comm: syz-executor.5 Not tainted 5.8.0-rc2 #80 [ 11.416544] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 11.418148] Call Trace: [ 11.418534] [ 11.418834] dump_stack+0x7d/0xb0 [ 11.419297] print_address_description.constprop.0+0x1a/0x210 [ 11.422079] kasan_report_invalid_free+0x51/0x80 [ 11.423433] __kasan_slab_free+0x15e/0x170 [ 11.424761] kfree+0x8c/0x230 [ 11.425157] tcp_cleanup_congestion_control+0x58/0xd0 [ 11.425872] tcp_v4_destroy_sock+0x57/0x5a0 [ 11.426493] inet_csk_destroy_sock+0x153/0x2c0 [ 11.427093] tcp_v4_syn_recv_sock+0xb29/0x1100 [ 11.427731] tcp_get_cookie_sock+0xc3/0x4a0 [ 11.429457] cookie_v4_check+0x13d0/0x2500 [ 11.433189] tcp_v4_do_rcv+0x60e/0x780 [ 11.433727] tcp_v4_rcv+0x2869/0x2e10 [ 11.437143] ip_protocol_deliver_rcu+0x23/0x190 [ 11.437810] ip_local_deliver+0x294/0x350 [ 11.439566] __netif_receive_skb_one_core+0x15d/0x1a0 [ 11.441995] process_backlog+0x1b1/0x6b0 [ 11.443148] net_rx_action+0x37e/0xc40 [ 11.445361] __do_softirq+0x18c/0x61a [ 11.445881] asm_call_on_stack+0x12/0x20 [ 11.446409] [ 11.446716] do_softirq_own_stack+0x34/0x40 [ 11.447259] do_softirq.part.0+0x26/0x30 [ 11.447827] __local_bh_enable_ip+0x46/0x50 [ 11.448406] ip_finish_output2+0x60f/0x1bc0 [ 11.450109] __ip_queue_xmit+0x71c/0x1b60 [ 11.451861] __tcp_transmit_skb+0x1727/0x3bb0 [ 11.453789] tcp_rcv_state_process+0x3070/0x4d3a [ 11.456810] tcp_v4_do_rcv+0x2ad/0x780 [ 11.457995] __release_sock+0x14b/0x2c0 [ 11.458529] release_sock+0x4a/0x170 [ 11.459005] __inet_stream_connect+0x467/0xc80 [ 11.461435] inet_stream_connect+0x4e/0xa0 [ 11.462043] __sys_connect+0x204/0x270 [ 11.465515] __x64_sys_connect+0x6a/0xb0 [ 11.466088] do_syscall_64+0x3e/0x70 [ 11.466617] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 11.467341] RIP: 0033:0x7f56046dc469 [ 11.467844] Code: Bad RIP value. [ 11.468282] RSP: 002b:00007f5604dccdd8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a [ 11.469326] RAX: ffffffffffffffda RBX: 000000000068bf00 RCX: 00007f56046dc469 [ 11.470379] RDX: 0000000000000010 RSI: 0000000020000000 RDI: 0000000000000004 [ 11.471311] RBP: 00000000ffffffff R08: 0000000000000000 R09: 0000000000000000 [ 11.472286] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 11.473341] R13: 000000000041427c R14: 00007f5604dcd5c0 R15: 0000000000000003 [ 11.474321] [ 11.474527] Allocated by task 4884: [ 11.475031] save_stack+0x1b/0x40 [ 11.475548] __kasan_kmalloc.constprop.0+0xc2/0xd0 [ 11.476182] tcp_cdg_init+0xf0/0x150 [ 11.476744] tcp_init_congestion_control+0x9b/0x3a0 [ 11.477435] tcp_set_congestion_control+0x270/0x32f [ 11.478088] do_tcp_setsockopt.isra.0+0x521/0x1a00 [ 11.478744] __sys_setsockopt+0xff/0x1e0 [ 11.479259] __x64_sys_setsockopt+0xb5/0x150 [ 11.479895] do_syscall_64+0x3e/0x70 [ 11.480395] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 11.481097] [ 11.481321] Freed by task 4872: [ 11.481783] save_stack+0x1b/0x40 [ 11.482230] __kasan_slab_free+0x12c/0x170 [ 11.482839] kfree+0x8c/0x230 [ 11.483240] tcp_cleanup_congestion_control+0x58/0xd0 [ 11.483948] tcp_v4_destroy_sock+0x57/0x5a0 [ 11.484502] inet_csk_destroy_sock+0x153/0x2c0 [ 11.485144] tcp_close+0x932/0xfe0 [ 11.485642] inet_release+0xc1/0x1c0 [ 11.486131] __sock_release+0xc0/0x270 [ 11.486697] sock_close+0xc/0x10 [ 11.487145] __fput+0x277/0x780 [ 11.487632] task_work_run+0xeb/0x180 [ 11.488118] __prepare_exit_to_usermode+0x15a/0x160 [ 11.488834] do_syscall_64+0x4a/0x70 [ 11.489326] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Wei Wang fixed a part of these CDG-malloc issues with commit c12014440750 ("tcp: memset ca_priv data to 0 properly"). This patch here fixes the listener-scenario: We make sure that listeners setting the congestion-control through setsockopt won't initialize it (thus CDG never allocates on listeners). For those who use AF_UNSPEC to reuse a socket, tcp_disconnect() is changed to cleanup afterwards. (The issue can be reproduced at least down to v4.4.x.) Cc: Wei Wang Cc: Eric Dumazet Fixes: 2b0a8c9eee81 ("tcp: add CDG congestion control") Signed-off-by: Christoph Paasch Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 +++ net/ipv4/tcp_cong.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 861fbd84c9cf..6f0caf9a866d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2691,6 +2691,9 @@ int tcp_disconnect(struct sock *sk, int flags) tp->window_clamp = 0; tp->delivered = 0; tp->delivered_ce = 0; + if (icsk->icsk_ca_ops->release) + icsk->icsk_ca_ops->release(sk); + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 3172e31987be..62878cf26d9c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -197,7 +197,7 @@ static void tcp_reinit_congestion_control(struct sock *sk, icsk->icsk_ca_setsockopt = 1; memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); - if (sk->sk_state != TCP_CLOSE) + if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) tcp_init_congestion_control(sk); } From f43cb0d672aa8eb09bfdb779de5900c040487d1d Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 9 Jul 2020 12:51:51 +0100 Subject: [PATCH 306/343] selftests: bpf: Fix detach from sockmap tests Fix sockmap tests which rely on old bpf_prog_dispatch behaviour. In the first case, the tests check that detaching without giving a program succeeds. Since these are not the desired semantics, invert the condition. In the second case, the clean up code doesn't supply the necessary program fds. Fixes: bb0de3131f4c ("bpf: sockmap: Require attach_bpf_fd when detaching a program") Reported-by: Martin KaFai Lau Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20200709115151.75829-1-lmb@cloudflare.com --- tools/testing/selftests/bpf/test_maps.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 6a12a0e01e07..754cf611723e 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -789,19 +789,19 @@ static void test_sockmap(unsigned int tasks, void *data) } err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_PARSER); - if (err) { + if (!err) { printf("Failed empty parser prog detach\n"); goto out_sockmap; } err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_VERDICT); - if (err) { + if (!err) { printf("Failed empty verdict prog detach\n"); goto out_sockmap; } err = bpf_prog_detach(fd, BPF_SK_MSG_VERDICT); - if (err) { + if (!err) { printf("Failed empty msg verdict prog detach\n"); goto out_sockmap; } @@ -1090,19 +1090,19 @@ static void test_sockmap(unsigned int tasks, void *data) assert(status == 0); } - err = bpf_prog_detach(map_fd_rx, __MAX_BPF_ATTACH_TYPE); + err = bpf_prog_detach2(parse_prog, map_fd_rx, __MAX_BPF_ATTACH_TYPE); if (!err) { printf("Detached an invalid prog type.\n"); goto out_sockmap; } - err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_PARSER); + err = bpf_prog_detach2(parse_prog, map_fd_rx, BPF_SK_SKB_STREAM_PARSER); if (err) { printf("Failed parser prog detach\n"); goto out_sockmap; } - err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_VERDICT); + err = bpf_prog_detach2(verdict_prog, map_fd_rx, BPF_SK_SKB_STREAM_VERDICT); if (err) { printf("Failed parser prog detach\n"); goto out_sockmap; From 14b032b8f8fce03a546dcf365454bec8c4a58d7d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 9 Jul 2020 16:28:44 -0700 Subject: [PATCH 307/343] cgroup: Fix sock_cgroup_data on big-endian. In order for no_refcnt and is_data to be the lowest order two bits in the 'val' we have to pad out the bitfield of the u8. Fixes: ad0f75e5f57c ("cgroup: fix cgroup_sk_alloc() for sk_clone_lock()") Reported-by: Guenter Roeck Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4f1cd0edc57d..fee0b5547cd0 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -792,6 +792,7 @@ struct sock_cgroup_data { struct { u8 is_data : 1; u8 no_refcnt : 1; + u8 unused : 6; u8 padding; u16 prioidx; u32 classid; @@ -801,6 +802,7 @@ struct sock_cgroup_data { u32 classid; u16 prioidx; u8 padding; + u8 unused : 6; u8 no_refcnt : 1; u8 is_data : 1; } __packed; From 355a3587d4ca09f2b1014778a7c8908351a91468 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Wed, 8 Jul 2020 21:07:56 +0200 Subject: [PATCH 308/343] kbuild: Move -Wtype-limits to W=2 -Wtype-limits is included in -Wextra which is added at W=1. It warns (among other things) that 'comparison of an unsigned variable `< 0` is always false. This causes noisy warnings, especially when used in macros, hence it is more suitable for W=2. Link: https://lore.kernel.org/lkml/CAHk-=wiKCXEWKJ9dWUimGbrVRo_N2RosESUw8E7m9AEtyZcu=w@mail.gmail.com/ Signed-off-by: Rikard Falkeborn Suggested-by: Arnd Bergmann Acked-by: Andy Shevchenko Signed-off-by: Linus Torvalds --- scripts/Makefile.extrawarn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 4aea7cf71d11..62c275685b75 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -35,6 +35,7 @@ KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) # The following turn off the warnings enabled by -Wextra KBUILD_CFLAGS += -Wno-missing-field-initializers KBUILD_CFLAGS += -Wno-sign-compare +KBUILD_CFLAGS += -Wno-type-limits KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 @@ -66,6 +67,7 @@ KBUILD_CFLAGS += -Wshadow KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) KBUILD_CFLAGS += -Wmissing-field-initializers KBUILD_CFLAGS += -Wsign-compare +KBUILD_CFLAGS += -Wtype-limits KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) From d4e60453266b95b9dc19e0af2a819617e556bc4e Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Thu, 9 Jul 2020 19:11:02 -0300 Subject: [PATCH 309/343] Restore gcc check in mips asm/unroll.h While raising the gcc version requirement to 4.9, the compile-time check in the unroll macro was accidentally changed from being used on gcc and clang to being used on clang only. Restore the gcc check, changing it from "gcc >= 4.7" to "all gcc". [ We should probably remove this all entirely: if we remove the check for CLANG, then the check for GCC can go away. Older versions of clang are not really appropriate or supported for kernel builds - Linus ] Fixes: 6ec4476ac825 ("Raise gcc version requirement to 4.9") Signed-off-by: Cesar Eduardo Barros Signed-off-by: Linus Torvalds --- arch/mips/include/asm/unroll.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h index 8ed660adc84f..49009319ac2c 100644 --- a/arch/mips/include/asm/unroll.h +++ b/arch/mips/include/asm/unroll.h @@ -25,7 +25,8 @@ * generate reasonable code for the switch statement, \ * so we skip the sanity check for those compilers. \ */ \ - BUILD_BUG_ON((CONFIG_CLANG_VERSION >= 80000) && \ + BUILD_BUG_ON((CONFIG_CC_IS_GCC || \ + CONFIG_CLANG_VERSION >= 80000) && \ !__builtin_constant_p(times)); \ \ switch (times) { \ From 47afbdd2fa4c5775c383ba376a3d1da7d7f694dc Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 14 Jun 2020 17:31:26 +0300 Subject: [PATCH 310/343] net/mlx5: Fix eeprom support for SFP module Fix eeprom SFP query support by setting i2c_addr, offset and page number correctly. Unlike QSFP modules, SFP eeprom params are as follow: - i2c_addr is 0x50 for offset 0 - 255 and 0x51 for offset 256 - 511. - Page number is always zero. - Page offset is always relative to zero. As part of eeprom query, query the module ID (SFP / QSFP*) via helper function to set the params accordingly. In addition, change mlx5_qsfp_eeprom_page() input type to be u16 to avoid unnecessary casting. Fixes: a708fb7b1f8d ("net/mlx5e: ethtool, Add support for EEPROM high pages query") Signed-off-by: Eran Ben Elisha Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/port.c | 93 +++++++++++++++---- 1 file changed, 77 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 9f829e68fc73..e4186e84b3ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -293,7 +293,40 @@ static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) return 0; } -static int mlx5_eeprom_page(int offset) +static int mlx5_query_module_id(struct mlx5_core_dev *dev, int module_num, + u8 *module_id) +{ + u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {}; + u32 out[MLX5_ST_SZ_DW(mcia_reg)]; + int err, status; + u8 *ptr; + + MLX5_SET(mcia_reg, in, i2c_device_address, MLX5_I2C_ADDR_LOW); + MLX5_SET(mcia_reg, in, module, module_num); + MLX5_SET(mcia_reg, in, device_address, 0); + MLX5_SET(mcia_reg, in, page_number, 0); + MLX5_SET(mcia_reg, in, size, 1); + MLX5_SET(mcia_reg, in, l, 0); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCIA, 0, 0); + if (err) + return err; + + status = MLX5_GET(mcia_reg, out, status); + if (status) { + mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", + status); + return -EIO; + } + ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + + *module_id = ptr[0]; + + return 0; +} + +static int mlx5_qsfp_eeprom_page(u16 offset) { if (offset < MLX5_EEPROM_PAGE_LENGTH) /* Addresses between 0-255 - page 00 */ @@ -307,7 +340,7 @@ static int mlx5_eeprom_page(int offset) MLX5_EEPROM_HIGH_PAGE_LENGTH); } -static int mlx5_eeprom_high_page_offset(int page_num) +static int mlx5_qsfp_eeprom_high_page_offset(int page_num) { if (!page_num) /* Page 0 always start from low page */ return 0; @@ -316,35 +349,62 @@ static int mlx5_eeprom_high_page_offset(int page_num) return page_num * MLX5_EEPROM_HIGH_PAGE_LENGTH; } +static void mlx5_qsfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset) +{ + *i2c_addr = MLX5_I2C_ADDR_LOW; + *page_num = mlx5_qsfp_eeprom_page(*offset); + *offset -= mlx5_qsfp_eeprom_high_page_offset(*page_num); +} + +static void mlx5_sfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset) +{ + *i2c_addr = MLX5_I2C_ADDR_LOW; + *page_num = 0; + + if (*offset < MLX5_EEPROM_PAGE_LENGTH) + return; + + *i2c_addr = MLX5_I2C_ADDR_HIGH; + *offset -= MLX5_EEPROM_PAGE_LENGTH; +} + int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, u16 offset, u16 size, u8 *data) { - int module_num, page_num, status, err; + int module_num, status, err, page_num = 0; + u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {}; u32 out[MLX5_ST_SZ_DW(mcia_reg)]; - u32 in[MLX5_ST_SZ_DW(mcia_reg)]; - u16 i2c_addr; - void *ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + u16 i2c_addr = 0; + u8 module_id; + void *ptr; err = mlx5_query_module_num(dev, &module_num); if (err) return err; - memset(in, 0, sizeof(in)); - size = min_t(int, size, MLX5_EEPROM_MAX_BYTES); + err = mlx5_query_module_id(dev, module_num, &module_id); + if (err) + return err; - /* Get the page number related to the given offset */ - page_num = mlx5_eeprom_page(offset); - - /* Set the right offset according to the page number, - * For page_num > 0, relative offset is always >= 128 (high page). - */ - offset -= mlx5_eeprom_high_page_offset(page_num); + switch (module_id) { + case MLX5_MODULE_ID_SFP: + mlx5_sfp_eeprom_params_set(&i2c_addr, &page_num, &offset); + break; + case MLX5_MODULE_ID_QSFP: + case MLX5_MODULE_ID_QSFP_PLUS: + case MLX5_MODULE_ID_QSFP28: + mlx5_qsfp_eeprom_params_set(&i2c_addr, &page_num, &offset); + break; + default: + mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); + return -EINVAL; + } if (offset + size > MLX5_EEPROM_PAGE_LENGTH) /* Cross pages read, read until offset 256 in low page */ size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; - i2c_addr = MLX5_I2C_ADDR_LOW; + size = min_t(int, size, MLX5_EEPROM_MAX_BYTES); MLX5_SET(mcia_reg, in, l, 0); MLX5_SET(mcia_reg, in, module, module_num); @@ -365,6 +425,7 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, return -EIO; } + ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); memcpy(data, ptr, size); return size; From 01f3d5db4a9add67f53630a7f71664538b3b9783 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Wed, 17 Jun 2020 15:11:24 -0700 Subject: [PATCH 311/343] net/mlx5: E-Switch, Fix vlan or qos setting in legacy mode Refactoring eswitch ingress acl codes accidentally inserts extra memset zero that removes vlan and/or qos setting in legacy mode. Fixes: 07bab9502641 ("net/mlx5: E-Switch, Refactor eswitch ingress acl codes") Signed-off-by: Vu Pham Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c index 5dc335e621c5..b68976b378b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -217,7 +217,6 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, } /* Create ingress allow rule */ - memset(spec, 0, sizeof(*spec)); spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, From 2fb15e72c0d7fc5fb05aefd3a7f0d70cf39d3ad4 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 17 Jun 2020 17:26:33 +0300 Subject: [PATCH 312/343] net/mxl5e: Verify that rpriv is not NULL In helper function is_flow_rule_duplicate_allowed() verify that rpviv pointer is not NULL before dereferencing it. This can happen when device is in NIC mode and leads to following crash: [90444.046419] BUG: kernel NULL pointer dereference, address: 0000000000000000 [90444.048149] #PF: supervisor read access in kernel mode [90444.049781] #PF: error_code(0x0000) - not-present page [90444.051386] PGD 80000003d35a4067 P4D 80000003d35a4067 PUD 3d35a3067 PMD 0 [90444.053051] Oops: 0000 [#1] SMP PTI [90444.054683] CPU: 16 PID: 31736 Comm: tc Not tainted 5.8.0-rc1+ #1157 [90444.056340] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [90444.058079] RIP: 0010:mlx5e_configure_flower+0x3aa/0x9b0 [mlx5_core] [90444.059753] Code: 24 50 49 8b 95 08 02 00 00 48 b8 00 08 00 00 04 00 00 00 48 21 c2 48 39 c2 74 0a 41 f6 85 0d 02 00 00 20 74 16 48 8b 44 24 20 <48> 8b 00 66 83 78 20 ff 74 07 4d 89 aa e0 00 00 00 48 83 7d 28 00 [90444.063232] RSP: 0018:ffffabe9c61ff768 EFLAGS: 00010246 [90444.065014] RAX: 0000000000000000 RBX: ffff9b13c4c91e80 RCX: 00000000000093fa [90444.066784] RDX: 0000000400000800 RSI: 0000000000000000 RDI: 000000000002d5e0 [90444.068533] RBP: ffff9b174d308468 R08: 0000000000000000 R09: ffff9b17d63003f0 [90444.070285] R10: ffff9b17ea288600 R11: 0000000000000000 R12: ffffabe9c61ff878 [90444.072032] R13: ffff9b174d300000 R14: ffffabe9c61ffbb8 R15: ffff9b174d300880 [90444.073760] FS: 00007f3c23775480(0000) GS:ffff9b13efc80000(0000) knlGS:0000000000000000 [90444.075492] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [90444.077266] CR2: 0000000000000000 CR3: 00000003e2a60002 CR4: 00000000001606e0 [90444.079024] Call Trace: [90444.080753] tc_setup_cb_add+0xca/0x1e0 [90444.082415] fl_hw_replace_filter+0x15f/0x1f0 [cls_flower] [90444.084119] fl_change+0xa59/0x13dc [cls_flower] [90444.085772] ? wait_for_completion+0xa8/0xf0 [90444.087364] tc_new_tfilter+0x3f5/0xa60 [90444.088960] rtnetlink_rcv_msg+0xeb/0x360 [90444.090514] ? __d_lookup_done+0x76/0xe0 [90444.092034] ? proc_alloc_inode+0x16/0x70 [90444.093560] ? prep_new_page+0x8c/0xf0 [90444.095048] ? _cond_resched+0x15/0x30 [90444.096483] ? rtnl_calcit.isra.0+0x110/0x110 [90444.097907] netlink_rcv_skb+0x49/0x110 [90444.099289] netlink_unicast+0x191/0x230 [90444.100629] netlink_sendmsg+0x243/0x480 [90444.101984] sock_sendmsg+0x5e/0x60 [90444.103305] ____sys_sendmsg+0x1f3/0x260 [90444.104597] ? copy_msghdr_from_user+0x5c/0x90 [90444.105916] ? __mod_lruvec_state+0x3c/0xe0 [90444.107210] ___sys_sendmsg+0x81/0xc0 [90444.108484] ? do_filp_open+0xa5/0x100 [90444.109732] ? handle_mm_fault+0x117b/0x1e00 [90444.110970] ? __check_object_size+0x46/0x147 [90444.112205] ? __check_object_size+0x136/0x147 [90444.113402] __sys_sendmsg+0x59/0xa0 [90444.114587] do_syscall_64+0x4d/0x90 [90444.115782] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [90444.116953] RIP: 0033:0x7f3c2393b7b8 [90444.118101] Code: Bad RIP value. [90444.119240] RSP: 002b:00007ffc6ad8e6c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [90444.120408] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3c2393b7b8 [90444.121583] RDX: 0000000000000000 RSI: 00007ffc6ad8e740 RDI: 0000000000000003 [90444.122750] RBP: 000000005eea0c3a R08: 0000000000000001 R09: 00007ffc6ad8e68c [90444.123928] R10: 0000000000404fa8 R11: 0000000000000246 R12: 0000000000000001 [90444.125073] R13: 0000000000000000 R14: 00007ffc6ad92a00 R15: 00000000004866a0 [90444.126221] Modules linked in: act_skbedit act_tunnel_key act_mirred bonding vxlan ip6_udp_tunnel udp_tunnel nfnetlink act_gact cls_flower sch_ingress openvswitch nsh nf_conncount nfsv3 nfs_acl nfs lockd grace fscache tun bridge stp llc sunrpc rdma_ucm rdma_cm iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5_core intel_r apl_msr intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel mlxfw kvm act_ct nf_flow_table nf_nat nf_conntrack irqbypass crct10dif_pclmul nf_defrag_ipv6 igb ipmi_ssif libcrc32c crc32_pclmul crc32c_intel ipmi_si nf_defrag_ipv4 ptp ghash_clmulni_intel mei_me ses iTCO_wdt i2c_i801 pps_core ioatdma iTCO_vendor_support joydev mei enclosure intel_cstate i2c_smbus wmi dca ipmi_devintf intel_uncore lpc_ich ipmi_msghandler pcspkr acpi_pad acpi_power_meter ast i2c_algo_bit drm_vram_helper drm_kms_helper drm_ttm_helper ttm drm mpt3sas raid_class scsi_transport_sas [90444.136253] CR2: 0000000000000000 [90444.137621] ---[ end trace 924af62aa2b151bd ]--- Fixes: 553f9328385d ("net/mlx5e: Support tc block sharing for representors") Reported-by: David Ahern Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7fc84f58e28a..75f169aef1cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4670,9 +4670,10 @@ static bool is_flow_rule_duplicate_allowed(struct net_device *dev, struct mlx5e_rep_priv *rpriv) { /* Offloaded flow rule is allowed to duplicate on non-uplink representor - * sharing tc block with other slaves of a lag device. + * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this + * function is called from NIC mode. */ - return netif_is_lag_port(dev) && rpriv->rep->vport != MLX5_VPORT_UPLINK; + return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK; } int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, From c1aea9e1765a047c7397da30f52315c43e3501fb Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 17 Jun 2020 17:51:53 +0300 Subject: [PATCH 313/343] net/mlx5e: Fix usage of rcu-protected pointer In mlx5e_configure_flower() flow pointer is protected by rcu read lock. However, after cited commit the pointer is being used outside of rcu read block. Extend the block to protect all pointer accesses. Fixes: 553f9328385d ("net/mlx5e: Support tc block sharing for representors") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 75f169aef1cf..cc8412151ca0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4687,13 +4687,12 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, rcu_read_lock(); flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); - rcu_read_unlock(); if (flow) { /* Same flow rule offloaded to non-uplink representor sharing tc block, * just return 0. */ if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev) - goto out; + goto rcu_unlock; NL_SET_ERR_MSG_MOD(extack, "flow cookie already exists, ignoring"); @@ -4701,8 +4700,12 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, "flow cookie %lx already exists, ignoring\n", f->cookie); err = -EEXIST; - goto out; + goto rcu_unlock; } +rcu_unlock: + rcu_read_unlock(); + if (flow) + goto out; trace_mlx5e_configure_flower(f); err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); From b3c2ed21c0bdf35ba498a9974aa587f99a03b658 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 24 Jun 2020 19:04:03 +0300 Subject: [PATCH 314/343] net/mlx5e: Fix VXLAN configuration restore after function reload When detaching netdev, remove vxlan port configuration using udp_tunnel_drop_rx_info. During function reload, configuration will be restored using udp_tunnel_get_rx_info. This ensures sync between firmware and driver. Use udp_tunnel_get_rx_info even if its physical interface is down. Fixes: 4383cfcc65e7 ("net/mlx5: Add devlink reload") Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a836a02a2116..888e38b21c3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3104,9 +3104,6 @@ int mlx5e_open(struct net_device *netdev) mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP); mutex_unlock(&priv->state_lock); - if (mlx5_vxlan_allowed(priv->mdev->vxlan)) - udp_tunnel_get_rx_info(netdev); - return err; } @@ -5202,6 +5199,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) rtnl_lock(); if (netif_running(netdev)) mlx5e_open(netdev); + if (mlx5_vxlan_allowed(priv->mdev->vxlan)) + udp_tunnel_get_rx_info(netdev); netif_device_attach(netdev); rtnl_unlock(); } @@ -5216,6 +5215,8 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) rtnl_lock(); if (netif_running(priv->netdev)) mlx5e_close(priv->netdev); + if (mlx5_vxlan_allowed(priv->mdev->vxlan)) + udp_tunnel_drop_rx_info(priv->netdev); netif_device_detach(priv->netdev); rtnl_unlock(); From f4aebbfb56ed0c186adbeb2799df836da50f78e3 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 20 May 2020 10:37:42 +0300 Subject: [PATCH 315/343] net/mlx5e: Fix CPU mapping after function reload to avoid aRFS RX crash After function reload, CPU mapping used by aRFS RX is broken, leading to a kernel panic. Fix by moving initialization of rx_cpu_rmap from netdev_init to netdev_attach. IRQ table is re-allocated on mlx5_load, but netdev is not re-initialize. Trace of the panic: [ 22.055672] general protection fault, probably for non-canonical address 0x785634120000ff1c: 0000 [#1] SMP PTI [ 22.065010] CPU: 4 PID: 0 Comm: swapper/4 Not tainted 5.7.0-rc2-for-upstream-perf-2020-04-21_16-34-03-31 #1 [ 22.067967] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 22.071174] RIP: 0010:get_rps_cpu+0x267/0x300 [ 22.075692] RSP: 0018:ffffc90000244d60 EFLAGS: 00010202 [ 22.076888] RAX: ffff888459b0e400 RBX: 0000000000000000 RCX:0000000000000007 [ 22.078364] RDX: 0000000000008884 RSI: ffff888467cb5b00 RDI:0000000000000000 [ 22.079815] RBP: 00000000ff342b27 R08: 0000000000000007 R09:0000000000000003 [ 22.081289] R10: ffffffffffffffff R11: 00000000000070cc R12:ffff888454900000 [ 22.082767] R13: ffffc90000e5a950 R14: ffffc90000244dc0 R15:0000000000000007 [ 22.084190] FS: 0000000000000000(0000) GS:ffff88846fc80000(0000)knlGS:0000000000000000 [ 22.086161] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 22.087427] CR2: ffffffffffffffff CR3: 0000000464426003 CR4:0000000000760ee0 [ 22.088888] DR0: 0000000000000000 DR1: 0000000000000000 DR2:0000000000000000 [ 22.090336] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:0000000000000400 [ 22.091764] PKRU: 55555554 [ 22.092618] Call Trace: [ 22.093442] [ 22.094211] ? kvm_clock_get_cycles+0xd/0x10 [ 22.095272] netif_receive_skb_list_internal+0x258/0x2a0 [ 22.096460] gro_normal_list.part.137+0x19/0x40 [ 22.097547] napi_complete_done+0xc6/0x110 [ 22.098685] mlx5e_napi_poll+0x190/0x670 [mlx5_core] [ 22.099859] net_rx_action+0x2a0/0x400 [ 22.100848] __do_softirq+0xd8/0x2a8 [ 22.101829] irq_exit+0xa5/0xb0 [ 22.102750] do_IRQ+0x52/0xd0 [ 22.103654] common_interrupt+0xf/0xf [ 22.104641] Fixes: 4383cfcc65e7 ("net/mlx5: Add devlink reload") Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 888e38b21c3d..081f15074cac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5118,6 +5118,10 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_flow_steering; +#ifdef CONFIG_MLX5_EN_ARFS + priv->netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(priv->mdev); +#endif + return 0; err_destroy_flow_steering: @@ -5289,10 +5293,6 @@ int mlx5e_netdev_init(struct net_device *netdev, /* netdev init */ netif_carrier_off(netdev); -#ifdef CONFIG_MLX5_EN_ARFS - netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(mdev); -#endif - return 0; err_free_cpumask: From 6a1cf4e443a3b0a4d690d3c93b84b1e9cbfcb1bd Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 15 Jun 2020 12:48:47 +0300 Subject: [PATCH 316/343] net/mlx5e: Fix 50G per lane indication Some released FW versions mistakenly don't set the capability that 50G per lane link-modes are supported for VFs (ptys_extended_ethernet capability bit). When the capability is unset, read PTYS.ext_eth_proto_capability (always reliable). If PTYS.ext_eth_proto_capability is valid (has a non-zero value) conclude that the HCA supports 50G per lane. Otherwise, conclude that the HCA doesn't support 50G per lane. Fixes: a08b4ed1373d ("net/mlx5: Add support to ext_* fields introduced in Port Type and Speed register") Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/port.c | 21 ++++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/en/port.h | 2 +- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 8 +++---- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 2a8950b3056f..3cf3e35053f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -78,11 +78,26 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { [MLX5E_400GAUI_8] = 400000, }; +bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev) +{ + struct mlx5e_port_eth_proto eproto; + int err; + + if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet)) + return true; + + err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto); + if (err) + return false; + + return !!eproto.cap; +} + static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, const u32 **arr, u32 *size, bool force_legacy) { - bool ext = force_legacy ? false : MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + bool ext = force_legacy ? false : mlx5e_ptys_ext_supported(mdev); *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : ARRAY_SIZE(mlx5e_link_speed); @@ -177,7 +192,7 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) bool ext; int err; - ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + ext = mlx5e_ptys_ext_supported(mdev); err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) goto out; @@ -205,7 +220,7 @@ int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) int err; int i; - ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + ext = mlx5e_ptys_ext_supported(mdev); err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index a2ddd446dd59..7a7defe60792 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -54,7 +54,7 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, bool force_legacy); - +bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev); int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index ec5658bbe3c5..c2464c349117 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -200,7 +200,7 @@ static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, struct ptys2ethtool_config **arr, u32 *size) { - bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + bool ext = mlx5e_ptys_ext_supported(mdev); *arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; *size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : @@ -883,7 +883,7 @@ static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp, struct ethtool_link_ksettings *link_ksettings) { unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; - bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + bool ext = mlx5e_ptys_ext_supported(mdev); ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext); } @@ -913,7 +913,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, __func__, err); goto err_query_regs; } - ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability); eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_capability); eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, @@ -1066,7 +1066,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, autoneg = link_ksettings->base.autoneg; speed = link_ksettings->base.speed; - ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + ext_supported = mlx5e_ptys_ext_supported(mdev); ext = ext_requested(autoneg, adver, ext_supported); if (!ext_supported && ext) return -EOPNOTSUPP; From 88b3d5c90e9685be54dd5bc441970044020eca76 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 22 Jun 2020 09:03:31 +0300 Subject: [PATCH 317/343] net/mlx5e: Fix port buffers cell size value Device unit for port buffers size, xoff_threshold and xon_threshold is cells. Fix a bug in driver where cell unit size was hard-coded to 128 bytes. This hard-coded value is buggy, as it is wrong for some hardware versions. Driver to read cell size from SBCAM register and translate bytes to cell units accordingly. In order to fix the bug, this patch exposes SBCAM (Shared buffer capabilities mask) layout and defines. If SBCAM.cap_cell_size is valid, use it for all bytes to cells calculations. If not valid, fallback to 128. Cell size do not change on the fly per device. Instead of issuing SBCAM access reg command every time such translation is needed, cache it in mlx5e_dcbx as part of mlx5e_dcbnl_initialize(). Pass dcbx.port_buff_cell_sz as a param to every function that needs bytes to cells translation. While fixing the bug, move MLX5E_BUFFER_CELL_SHIFT macro to en_dcbnl.c, as it is only used by that file. Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") Signed-off-by: Eran Ben Elisha Reviewed-by: Huy Nguyen Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/dcbnl.h | 1 + .../mellanox/mlx5/core/en/port_buffer.c | 51 ++++++++++--------- .../mellanox/mlx5/core/en/port_buffer.h | 1 - .../ethernet/mellanox/mlx5/core/en_dcbnl.c | 19 +++++++ include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 28 ++++++++++ 6 files changed, 77 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h index 7be6b2d36b60..9976de8b9047 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h @@ -29,6 +29,7 @@ struct mlx5e_dcbx { bool manual_buffer; u32 cable_len; u32 xoff; + u16 port_buff_cell_sz; }; #define MLX5E_MAX_DSCP (64) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index ae99fac08b53..673f1c82d381 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -34,6 +34,7 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, struct mlx5e_port_buffer *port_buffer) { + u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; struct mlx5_core_dev *mdev = priv->mdev; int sz = MLX5_ST_SZ_BYTES(pbmc_reg); u32 total_used = 0; @@ -57,11 +58,11 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, port_buffer->buffer[i].epsb = MLX5_GET(bufferx_reg, buffer, epsb); port_buffer->buffer[i].size = - MLX5_GET(bufferx_reg, buffer, size) << MLX5E_BUFFER_CELL_SHIFT; + MLX5_GET(bufferx_reg, buffer, size) * port_buff_cell_sz; port_buffer->buffer[i].xon = - MLX5_GET(bufferx_reg, buffer, xon_threshold) << MLX5E_BUFFER_CELL_SHIFT; + MLX5_GET(bufferx_reg, buffer, xon_threshold) * port_buff_cell_sz; port_buffer->buffer[i].xoff = - MLX5_GET(bufferx_reg, buffer, xoff_threshold) << MLX5E_BUFFER_CELL_SHIFT; + MLX5_GET(bufferx_reg, buffer, xoff_threshold) * port_buff_cell_sz; total_used += port_buffer->buffer[i].size; mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i, @@ -73,7 +74,7 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, } port_buffer->port_buffer_size = - MLX5_GET(pbmc_reg, out, port_buffer_size) << MLX5E_BUFFER_CELL_SHIFT; + MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz; port_buffer->spare_buffer_size = port_buffer->port_buffer_size - total_used; @@ -88,9 +89,9 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, static int port_set_buffer(struct mlx5e_priv *priv, struct mlx5e_port_buffer *port_buffer) { + u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; struct mlx5_core_dev *mdev = priv->mdev; int sz = MLX5_ST_SZ_BYTES(pbmc_reg); - void *buffer; void *in; int err; int i; @@ -104,16 +105,18 @@ static int port_set_buffer(struct mlx5e_priv *priv, goto out; for (i = 0; i < MLX5E_MAX_BUFFER; i++) { - buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); + void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); + u64 size = port_buffer->buffer[i].size; + u64 xoff = port_buffer->buffer[i].xoff; + u64 xon = port_buffer->buffer[i].xon; - MLX5_SET(bufferx_reg, buffer, size, - port_buffer->buffer[i].size >> MLX5E_BUFFER_CELL_SHIFT); - MLX5_SET(bufferx_reg, buffer, lossy, - port_buffer->buffer[i].lossy); - MLX5_SET(bufferx_reg, buffer, xoff_threshold, - port_buffer->buffer[i].xoff >> MLX5E_BUFFER_CELL_SHIFT); - MLX5_SET(bufferx_reg, buffer, xon_threshold, - port_buffer->buffer[i].xon >> MLX5E_BUFFER_CELL_SHIFT); + do_div(size, port_buff_cell_sz); + do_div(xoff, port_buff_cell_sz); + do_div(xon, port_buff_cell_sz); + MLX5_SET(bufferx_reg, buffer, size, size); + MLX5_SET(bufferx_reg, buffer, lossy, port_buffer->buffer[i].lossy); + MLX5_SET(bufferx_reg, buffer, xoff_threshold, xoff); + MLX5_SET(bufferx_reg, buffer, xon_threshold, xon); } err = mlx5e_port_set_pbmc(mdev, in); @@ -143,7 +146,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) } static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, - u32 xoff, unsigned int max_mtu) + u32 xoff, unsigned int max_mtu, u16 port_buff_cell_sz) { int i; @@ -155,7 +158,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, } if (port_buffer->buffer[i].size < - (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) { + (xoff + max_mtu + port_buff_cell_sz)) { pr_err("buffer_size[%d]=%d is not enough for lossless buffer\n", i, port_buffer->buffer[i].size); return -ENOMEM; @@ -175,6 +178,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, * @pfc_en: current pfc configuration * @buffer: current prio to buffer mapping * @xoff: xoff value + * @port_buff_cell_sz: port buffer cell_size * @port_buffer: port receive buffer configuration * @change: * @@ -189,7 +193,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, * sets change to true if buffer configuration was modified. */ static int update_buffer_lossy(unsigned int max_mtu, - u8 pfc_en, u8 *buffer, u32 xoff, + u8 pfc_en, u8 *buffer, u32 xoff, u16 port_buff_cell_sz, struct mlx5e_port_buffer *port_buffer, bool *change) { @@ -225,7 +229,7 @@ static int update_buffer_lossy(unsigned int max_mtu, } if (changed) { - err = update_xoff_threshold(port_buffer, xoff, max_mtu); + err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz); if (err) return err; @@ -262,6 +266,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u32 *buffer_size, u8 *prio2buffer) { + u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; struct mlx5e_port_buffer port_buffer; u32 xoff = calculate_xoff(priv, mtu); bool update_prio2buffer = false; @@ -282,7 +287,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_CABLE_LEN) { update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, max_mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz); if (err) return err; } @@ -292,7 +297,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, + err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, port_buff_cell_sz, &port_buffer, &update_buffer); if (err) return err; @@ -304,7 +309,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, + err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, port_buff_cell_sz, xoff, &port_buffer, &update_buffer); if (err) return err; @@ -329,7 +334,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, return -EINVAL; update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, max_mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz); if (err) return err; } @@ -337,7 +342,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, /* Need to update buffer configuration if xoff value is changed */ if (!update_buffer && xoff != priv->dcbx.xoff) { update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, max_mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz); if (err) return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h index 34f55b81a0de..80af7a5ac604 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h @@ -36,7 +36,6 @@ #include "port.h" #define MLX5E_MAX_BUFFER 8 -#define MLX5E_BUFFER_CELL_SHIFT 7 #define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */ #define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index bc102d094bbd..d20243d6a032 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1217,6 +1217,24 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv) return 0; } +#define MLX5E_BUFFER_CELL_SHIFT 7 + +static u16 mlx5e_query_port_buffers_cell_size(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(sbcam_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(sbcam_reg)] = {}; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return (1 << MLX5E_BUFFER_CELL_SHIFT); + + if (mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), + MLX5_REG_SBCAM, 0, 0)) + return (1 << MLX5E_BUFFER_CELL_SHIFT); + + return MLX5_GET(sbcam_reg, out, cap_cell_size); +} + void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) { struct mlx5e_dcbx *dcbx = &priv->dcbx; @@ -1234,6 +1252,7 @@ void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST) priv->dcbx.cap |= DCB_CAP_DCBX_HOST; + priv->dcbx.port_buff_cell_sz = mlx5e_query_port_buffers_cell_size(priv); priv->dcbx.manual_buffer = false; priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 13c0e4556eda..1e6ca716635a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -147,6 +147,7 @@ enum { MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, MLX5_REG_MIRC = 0x9162, + MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ca1887dd0423..073b79eacc99 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9960,6 +9960,34 @@ struct mlx5_ifc_pptb_reg_bits { u8 untagged_buff[0x4]; }; +struct mlx5_ifc_sbcam_reg_bits { + u8 reserved_at_0[0x8]; + u8 feature_group[0x8]; + u8 reserved_at_10[0x8]; + u8 access_reg_group[0x8]; + + u8 reserved_at_20[0x20]; + + u8 sb_access_reg_cap_mask[4][0x20]; + + u8 reserved_at_c0[0x80]; + + u8 sb_feature_cap_mask[4][0x20]; + + u8 reserved_at_1c0[0x40]; + + u8 cap_total_buffer_size[0x20]; + + u8 cap_cell_size[0x10]; + u8 cap_max_pg_buffers[0x8]; + u8 cap_num_pool_supported[0x8]; + + u8 reserved_at_240[0x8]; + u8 cap_sbsr_stat_size[0x8]; + u8 cap_max_tclass_data[0x8]; + u8 cap_max_cpu_ingress_tclass_sb[0x8]; +}; + struct mlx5_ifc_pbmc_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; From eb32b3f53d283e8d68b6d86c3a6ed859b24dacae Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Sun, 28 Jun 2020 15:42:26 +0300 Subject: [PATCH 318/343] net/mlx5e: CT: Fix memory leak in cleanup CT entries are deleted via a workqueue from netfilter. If removing the module before that, the rules are cleaned by the driver itself, but the memory entries for them are not freed. Fix that. Fixes: ac991b48d43c ("net/mlx5e: CT: Offload established flows") Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 430025550fad..aad1c29b23db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1097,6 +1097,7 @@ mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) struct mlx5_ct_entry *entry = ptr; mlx5_tc_ct_entry_del_rules(ct_priv, entry); + kfree(entry); } static void From b2f9f1535bb93ee5fa2ea30ac1c26fa0d676154c Mon Sep 17 00:00:00 2001 From: Jakub Bogusz Date: Thu, 9 Jul 2020 15:57:23 -0700 Subject: [PATCH 319/343] libbpf: Fix libbpf hashmap on (I)LP32 architectures On ILP32, 64-bit result was shifted by value calculated for 32-bit long type and returned value was much outside hashmap capacity. As advised by Andrii Nakryiko, this patch uses different hashing variant for architectures with size_t shorter than long long. Fixes: e3b924224028 ("libbpf: add resizable non-thread safe internal hashmap") Signed-off-by: Jakub Bogusz Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200709225723.1069937-1-andriin@fb.com --- tools/lib/bpf/hashmap.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index df59fd4fc95b..e0af36b0e5d8 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -11,14 +11,18 @@ #include #include #include -#ifndef __WORDSIZE -#define __WORDSIZE (__SIZEOF_LONG__ * 8) -#endif static inline size_t hash_bits(size_t h, int bits) { /* shuffle bits and return requested number of upper bits */ - return (h * 11400714819323198485llu) >> (__WORDSIZE - bits); +#if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__) + /* LP64 case */ + return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits); +#elif (__SIZEOF_SIZE_T__ <= __SIZEOF_LONG__) + return (h * 2654435769lu) >> (__SIZEOF_LONG__ * 8 - bits); +#else +# error "Unsupported size_t size" +#endif } typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); From 8c080d3a974ad471d8324825851044284f1886c9 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:36:42 +0800 Subject: [PATCH 320/343] kgdb: enable arch to support XML packet. The XML packet could be supported by required architecture if the architecture defines CONFIG_HAVE_ARCH_KGDB_QXFER_PKT and implement its own kgdb_arch_handle_qxfer_pkt(). Except for the kgdb_arch_handle_qxfer_pkt(), the architecture also needs to record the feature supported by gdb stub into the kgdb_arch_gdb_stub_feature, and these features will be reported to host gdb when gdb stub receives the qSupported packet. Signed-off-by: Vincent Chen Acked-by: Daniel Thompson Signed-off-by: Palmer Dabbelt --- include/linux/kgdb.h | 11 +++++++++++ kernel/debug/gdbstub.c | 13 +++++++++++++ lib/Kconfig.kgdb | 5 +++++ 3 files changed, 29 insertions(+) diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 529116b0cabe..0e4e3a80d58c 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -176,6 +176,17 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code, char *remcom_out_buffer, struct pt_regs *regs); +/** + * kgdb_arch_handle_qxfer_pkt - Handle architecture specific GDB XML + * packets. + * @remcom_in_buffer: The buffer of the packet we have read. + * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into. + */ + +extern void +kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, + char *remcom_out_buffer); + /** * kgdb_call_nmi_hook - Call kgdb_nmicallback() on the current CPU * @ignored: This parameter is only here to match the prototype. diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 61774aec46b4..a790026e42d0 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -792,6 +792,19 @@ static void gdb_cmd_query(struct kgdb_state *ks) } break; #endif +#ifdef CONFIG_HAVE_ARCH_KGDB_QXFER_PKT + case 'S': + if (!strncmp(remcom_in_buffer, "qSupported:", 11)) + strcpy(remcom_out_buffer, kgdb_arch_gdb_stub_feature); + break; + case 'X': + if (!strncmp(remcom_in_buffer, "qXfer:", 6)) + kgdb_arch_handle_qxfer_pkt(remcom_in_buffer, + remcom_out_buffer); + break; +#endif + default: + break; } } diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index ffa7a76de086..256f2486f9bd 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -3,6 +3,11 @@ config HAVE_ARCH_KGDB bool +# set if architecture has the its kgdb_arch_handle_qxfer_pkt +# function to enable gdb stub to address XML packet sent from GDB. +config HAVE_ARCH_KGDB_QXFER_PKT + bool + menuconfig KGDB bool "KGDB: kernel debugger" depends on HAVE_ARCH_KGDB From fc0c769ffd926312848912a7c2296e1c503898c3 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:36:59 +0800 Subject: [PATCH 321/343] riscv: enable the Kconfig prompt of STRICT_KERNEL_RWX Due to lack of hardware breakpoint support, the kernel option CONFIG_STRICT_KERNEL_RWX should be disabled when using KGDB. However, CONFIG_STRICT_KERNEL_RWX is always enabled now. Therefore, select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT to enable CONFIG_STRICT_KERNEL_RWX by default, and then select ARCH_OPTIONAL_KERNEL_RWX to enable the Kconfig prompt of CONFIG_STRICT_KERNEL_RWX so that users can turn it off. Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 128192e14ff2..3230c1d48562 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -23,6 +23,8 @@ config RISCV select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if MMU + select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if 64BIT From f7fc752815f8e2337548497b3afb4aef791db4ef Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:37:10 +0800 Subject: [PATCH 322/343] riscv: Fix "no previous prototype" compile warning in kgdb.c file Some functions are only used in the kgdb.c file. Add static properities to these functions to avoid "no previous prototype" compile warnings Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/kgdb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index c3275f42d1ac..963ed7edcff2 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -44,18 +44,18 @@ DECLARE_INSN(c_beqz, MATCH_C_BEQZ, MASK_C_BEQZ) DECLARE_INSN(c_bnez, MATCH_C_BNEZ, MASK_C_BNEZ) DECLARE_INSN(sret, MATCH_SRET, MASK_SRET) -int decode_register_index(unsigned long opcode, int offset) +static int decode_register_index(unsigned long opcode, int offset) { return (opcode >> offset) & 0x1F; } -int decode_register_index_short(unsigned long opcode, int offset) +static int decode_register_index_short(unsigned long opcode, int offset) { return ((opcode >> offset) & 0x7) + 8; } /* Calculate the new address for after a step */ -int get_step_address(struct pt_regs *regs, unsigned long *next_addr) +static int get_step_address(struct pt_regs *regs, unsigned long *next_addr) { unsigned long pc = regs->epc; unsigned long *regs_ptr = (unsigned long *)regs; @@ -136,7 +136,7 @@ int get_step_address(struct pt_regs *regs, unsigned long *next_addr) return 0; } -int do_single_step(struct pt_regs *regs) +static int do_single_step(struct pt_regs *regs) { /* Determine where the target instruction will send us to */ unsigned long addr = 0; @@ -320,7 +320,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, return err; } -int kgdb_riscv_kgdbbreak(unsigned long addr) +static int kgdb_riscv_kgdbbreak(unsigned long addr) { if (stepped_address == addr) return KGDB_SW_SINGLE_STEP; From def0aa218e6d42231540329e6f5741fdec9e7da4 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:37:25 +0800 Subject: [PATCH 323/343] kgdb: Move the extern declaration kgdb_has_hit_break() to generic kgdb.h Currently, only riscv kgdb.c uses the kgdb_has_hit_break() to identify the kgdb breakpoint. It causes other architectures will encounter the "no previous prototype" warnings if the compile option has W=1. Moving the declaration of extern kgdb_has_hit_break() from risc-v kgdb.h to generic kgdb.h to avoid generating these warnings. Signed-off-by: Vincent Chen Acked-by: Daniel Thompson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kgdb.h | 1 - include/linux/kgdb.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index 8177a457caff..f45889bbb965 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -19,7 +19,6 @@ #ifndef __ASSEMBLY__ -extern int kgdb_has_hit_break(unsigned long addr); extern unsigned long kgdb_compiled_break; static inline void arch_kgdb_breakpoint(void) diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 0e4e3a80d58c..477b8b7c908f 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -325,6 +325,7 @@ extern int kgdb_hex2mem(char *buf, char *mem, int count); extern int kgdb_isremovedbreak(unsigned long addr); extern void kgdb_schedule_breakpoint(void); +extern int kgdb_has_hit_break(unsigned long addr); extern int kgdb_handle_exception(int ex_vector, int signo, int err_code, From 70ee5731a40b1f07f151e52c3c4ed27d70d4f9fe Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:37:35 +0800 Subject: [PATCH 324/343] riscv: Avoid kgdb.h including gdb_xml.h to solve unused-const-variable warning The constant arrays in gdb_xml.h are only used in arch/riscv/kernel/kgdb.c, but other c files may include the gdb_xml.h indirectly via including the kgdb.h. Hence, It will cause many unused-const-variable warnings. This patch makes the kgdb.h not to include the gdb_xml.h to solve this problem. Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/gdb_xml.h | 3 +-- arch/riscv/include/asm/kgdb.h | 4 +++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/gdb_xml.h b/arch/riscv/include/asm/gdb_xml.h index 041b45f5b997..09342111f227 100644 --- a/arch/riscv/include/asm/gdb_xml.h +++ b/arch/riscv/include/asm/gdb_xml.h @@ -3,8 +3,7 @@ #ifndef __ASM_GDB_XML_H_ #define __ASM_GDB_XML_H_ -#define kgdb_arch_gdb_stub_feature riscv_gdb_stub_feature -static const char riscv_gdb_stub_feature[64] = +const char riscv_gdb_stub_feature[64] = "PacketSize=800;qXfer:features:read+;"; static const char gdb_xfer_read_target[31] = "qXfer:features:read:target.xml:"; diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index f45889bbb965..46677daf708b 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -105,7 +105,9 @@ static inline void arch_kgdb_breakpoint(void) #define DBG_REG_BADADDR_OFF 34 #define DBG_REG_CAUSE_OFF 35 -#include +extern const char riscv_gdb_stub_feature[64]; + +#define kgdb_arch_gdb_stub_feature riscv_gdb_stub_feature #endif #endif From 83d31e5271ac74aad14b5a1a2ed26923e1446329 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Jul 2020 13:12:09 -0400 Subject: [PATCH 325/343] KVM: nVMX: fixes for preemption timer migration Commit 850448f35aaf ("KVM: nVMX: Fix VMX preemption timer migration", 2020-06-01) accidentally broke nVMX live migration from older version by changing the userspace ABI. Restore it and, while at it, ensure that vmx->nested.has_preemption_timer_deadline is always initialized according to the KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE flag. Cc: Makarand Sonare Fixes: 850448f35aaf ("KVM: nVMX: Fix VMX preemption timer migration") Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 5 +++-- arch/x86/include/uapi/asm/kvm.h | 5 +++-- arch/x86/kvm/vmx/nested.c | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 426f94582b7a..320788f81a05 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4339,14 +4339,15 @@ Errors: #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 struct kvm_vmx_nested_state_hdr { - __u32 flags; __u64 vmxon_pa; __u64 vmcs12_pa; - __u64 preemption_timer_deadline; struct { __u16 flags; } smm; + + __u32 flags; + __u64 preemption_timer_deadline; }; struct kvm_vmx_nested_state_data { diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 17c5a038f42d..0780f97c1850 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -408,14 +408,15 @@ struct kvm_vmx_nested_state_data { }; struct kvm_vmx_nested_state_hdr { - __u32 flags; __u64 vmxon_pa; __u64 vmcs12_pa; - __u64 preemption_timer_deadline; struct { __u16 flags; } smm; + + __u32 flags; + __u64 preemption_timer_deadline; }; struct kvm_svm_nested_state_data { diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index b26655104d4a..d4a4cec034d0 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6176,6 +6176,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, goto error_guest_mode; } + vmx->nested.has_preemption_timer_deadline = false; if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) { vmx->nested.has_preemption_timer_deadline = true; vmx->nested.preemption_timer_deadline = From 3d9fdc252b52023260de1d12399cb3157ed28c07 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 10 Jul 2020 15:23:17 +0800 Subject: [PATCH 326/343] KVM: MIPS: Fix build errors for 32bit kernel Commit dc6d95b153e78ed70b1b2c04a ("KVM: MIPS: Add more MMIO load/store instructions emulation") introduced some 64bit load/store instructions emulation which are unavailable on 32bit platform, and it causes build errors: arch/mips/kvm/emulate.c: In function 'kvm_mips_emulate_store': arch/mips/kvm/emulate.c:1734:6: error: right shift count >= width of type [-Werror] ((vcpu->arch.gprs[rt] >> 56) & 0xff); ^ arch/mips/kvm/emulate.c:1738:6: error: right shift count >= width of type [-Werror] ((vcpu->arch.gprs[rt] >> 48) & 0xffff); ^ arch/mips/kvm/emulate.c:1742:6: error: right shift count >= width of type [-Werror] ((vcpu->arch.gprs[rt] >> 40) & 0xffffff); ^ arch/mips/kvm/emulate.c:1746:6: error: right shift count >= width of type [-Werror] ((vcpu->arch.gprs[rt] >> 32) & 0xffffffff); ^ arch/mips/kvm/emulate.c:1796:6: error: left shift count >= width of type [-Werror] (vcpu->arch.gprs[rt] << 32); ^ arch/mips/kvm/emulate.c:1800:6: error: left shift count >= width of type [-Werror] (vcpu->arch.gprs[rt] << 40); ^ arch/mips/kvm/emulate.c:1804:6: error: left shift count >= width of type [-Werror] (vcpu->arch.gprs[rt] << 48); ^ arch/mips/kvm/emulate.c:1808:6: error: left shift count >= width of type [-Werror] (vcpu->arch.gprs[rt] << 56); ^ cc1: all warnings being treated as errors make[3]: *** [arch/mips/kvm/emulate.o] Error 1 So, use #if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ) to guard the 64bit load/store instructions emulation. Reported-by: kernel test robot Fixes: dc6d95b153e78ed70b1b2c04a ("KVM: MIPS: Add more MMIO load/store instructions emulation") Signed-off-by: Huacai Chen Message-Id: <1594365797-536-1-git-send-email-chenhc@lemote.com> Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 5ae82d925197..d242300cacc0 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1722,6 +1722,7 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, vcpu->arch.gprs[rt], *(u32 *)data); break; +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ) case sdl_op: run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( vcpu->arch.host_cp0_badvaddr) & (~0x7); @@ -1815,6 +1816,7 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, vcpu->arch.gprs[rt], *(u64 *)data); break; +#endif #ifdef CONFIG_CPU_LOONGSON64 case sdc2_op: @@ -2002,6 +2004,7 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, } break; +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ) case ldl_op: run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( vcpu->arch.host_cp0_badvaddr) & (~0x7); @@ -2073,6 +2076,7 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, break; } break; +#endif #ifdef CONFIG_CPU_LOONGSON64 case ldc2_op: From ba8c423488974f02b538e9dc1730f0334f9b85aa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 10 Jul 2020 14:36:10 +0300 Subject: [PATCH 327/343] xen/xenbus: Fix a double free in xenbus_map_ring_pv() When there is an error the caller frees "info->node" so the free here will result in a double free. We should just delete first kfree(). Fixes: 3848e4e0a32a ("xen/xenbus: avoid large structs and arrays on the stack") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20200710113610.GA92345@mwanda Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/xenbus/xenbus_client.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 4f168b46fbca..786fbb7d8be0 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -693,10 +693,8 @@ static int xenbus_map_ring_pv(struct xenbus_device *dev, bool leaked; area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, info->ptes); - if (!area) { - kfree(node); + if (!area) return -ENOMEM; - } for (i = 0; i < nr_grefs; i++) info->phys_addrs[i] = From 2c08f65fd71cc2c96042c14fd3847a79262e1757 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 9 Jul 2020 10:37:54 +0200 Subject: [PATCH 328/343] MAINTAINERS: update email address for Heiko Carstens Signed-off-by: Heiko Carstens --- .mailmap | 2 ++ MAINTAINERS | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index c69d9c734fb5..ef310eeed7b5 100644 --- a/.mailmap +++ b/.mailmap @@ -95,6 +95,8 @@ Greg Kroah-Hartman Greg Kroah-Hartman Gregory CLEMENT Hanjun Guo +Heiko Carstens +Heiko Carstens Henk Vergonet Henrik Kretzschmar Henrik Rydberg diff --git a/MAINTAINERS b/MAINTAINERS index 1d4aa7f942de..76d6324b80ce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3306,7 +3306,7 @@ X: arch/riscv/net/bpf_jit_comp32.c BPF JIT for S390 M: Ilya Leoshkevich -M: Heiko Carstens +M: Heiko Carstens M: Vasily Gorbik L: netdev@vger.kernel.org L: bpf@vger.kernel.org @@ -14831,7 +14831,7 @@ S: Maintained F: drivers/video/fbdev/savage/ S390 -M: Heiko Carstens +M: Heiko Carstens M: Vasily Gorbik M: Christian Borntraeger L: linux-s390@vger.kernel.org From dd9ce2d6eeaebbdd342cbe095b7a195d569f18d3 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 10 Jul 2020 13:36:26 +0200 Subject: [PATCH 329/343] MAINTAINERS: update email address for Gerald Schaefer Signed-off-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- .mailmap | 3 +++ MAINTAINERS | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index ef310eeed7b5..6da12dfd10dc 100644 --- a/.mailmap +++ b/.mailmap @@ -90,6 +90,9 @@ Frank Rowand Frank Zago Gao Xiang Gao Xiang +Gerald Schaefer +Gerald Schaefer +Gerald Schaefer Greg Kroah-Hartman Greg Kroah-Hartman Greg Kroah-Hartman diff --git a/MAINTAINERS b/MAINTAINERS index 76d6324b80ce..06f61751353c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14862,7 +14862,7 @@ F: drivers/s390/block/dasd* F: include/linux/dasd_mod.h S390 IOMMU (PCI) -M: Gerald Schaefer +M: Gerald Schaefer L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ @@ -14890,7 +14890,7 @@ F: drivers/s390/net/ S390 PCI SUBSYSTEM M: Niklas Schnelle -M: Gerald Schaefer +M: Gerald Schaefer L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ From 667e57da358f61b6966e12e925a69e42d912e8bb Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 10 Jul 2020 14:14:20 +0000 Subject: [PATCH 330/343] io_uring: fix memleak in io_sqe_files_register() I got a memleak report when doing some fuzz test: BUG: memory leak unreferenced object 0x607eeac06e78 (size 8): comm "test", pid 295, jiffies 4294735835 (age 31.745s) hex dump (first 8 bytes): 00 00 00 00 00 00 00 00 ........ backtrace: [<00000000932632e6>] percpu_ref_init+0x2a/0x1b0 [<0000000092ddb796>] __io_uring_register+0x111d/0x22a0 [<00000000eadd6c77>] __x64_sys_io_uring_register+0x17b/0x480 [<00000000591b89a6>] do_syscall_64+0x56/0xa0 [<00000000864a281d>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Call percpu_ref_exit() on error path to avoid refcount memleak. Fixes: 05f3fb3c5397 ("io_uring: avoid ring quiesce for fixed file set unregister and update") Cc: stable@vger.kernel.org Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index a9ce2e6f03dd..fc07baf4392a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6699,6 +6699,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, for (i = 0; i < nr_tables; i++) kfree(ctx->file_data->table[i].files); + percpu_ref_exit(&ctx->file_data->refs); kfree(ctx->file_data->table); kfree(ctx->file_data); ctx->file_data = NULL; From 309fc03a3284af62eb6082fb60327045a1dabf57 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 10 Jul 2020 09:13:34 -0600 Subject: [PATCH 331/343] io_uring: account user memory freed when exit has been queued We currently account the memory after the exit work has been run, but that leaves a gap where a process has closed its ring and until the memory has been accounted as freed. If the memlocked ulimit is borderline, then that can introduce spurious setup errors returning -ENOMEM because the free work hasn't been run yet. Account this as freed when we close the ring, as not to expose a tiny gap where setting up a new ring can fail. Fixes: 85faa7b8346e ("io_uring: punt final io_ring_ctx wait-and-free to workqueue") Cc: stable@vger.kernel.org # v5.7 Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index fc07baf4392a..ca8abde48b6c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7351,9 +7351,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_mem_free(ctx->sq_sqes); percpu_ref_exit(&ctx->refs); - if (ctx->account_mem) - io_unaccount_mem(ctx->user, - ring_pages(ctx->sq_entries, ctx->cq_entries)); free_uid(ctx->user); put_cred(ctx->creds); kfree(ctx->cancel_hash); @@ -7438,6 +7435,16 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) if (ctx->rings) io_cqring_overflow_flush(ctx, true); idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx); + + /* + * Do this upfront, so we won't have a grace period where the ring + * is closed but resources aren't reaped yet. This can cause + * spurious failure in setting up a new ring. + */ + if (ctx->account_mem) + io_unaccount_mem(ctx->user, + ring_pages(ctx->sq_entries, ctx->cq_entries)); + INIT_WORK(&ctx->exit_work, io_ring_exit_work); queue_work(system_wq, &ctx->exit_work); } From c8b1d7436045d3599bae56aef1682813ecccaad7 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 10 Jul 2020 12:55:08 +0200 Subject: [PATCH 332/343] bnxt_en: fix NULL dereference in case SR-IOV configuration fails we need to set 'active_vfs' back to 0, if something goes wrong during the allocation of SR-IOV resources: otherwise, further VF configurations will wrongly assume that bp->pf.vf[x] are valid memory locations, and commands like the ones in the following sequence: # echo 2 >/sys/bus/pci/devices/${ADDR}/sriov_numvfs # ip link set dev ens1f0np0 up # ip link set dev ens1f0np0 vf 0 trust on will cause a kernel crash similar to this: bnxt_en 0000:3b:00.0: not enough MMIO resources for SR-IOV BUG: kernel NULL pointer dereference, address: 0000000000000014 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 43 PID: 2059 Comm: ip Tainted: G I 5.8.0-rc2.upstream+ #871 Hardware name: Dell Inc. PowerEdge R740/08D89F, BIOS 2.2.11 06/13/2019 RIP: 0010:bnxt_set_vf_trust+0x5b/0x110 [bnxt_en] Code: 44 24 58 31 c0 e8 f5 fb ff ff 85 c0 0f 85 b6 00 00 00 48 8d 1c 5b 41 89 c6 b9 0b 00 00 00 48 c1 e3 04 49 03 9c 24 f0 0e 00 00 <8b> 43 14 89 c2 83 c8 10 83 e2 ef 45 84 ed 49 89 e5 0f 44 c2 4c 89 RSP: 0018:ffffac6246a1f570 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000000000000b RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff98b28f538900 RBP: ffff98b28f538900 R08: 0000000000000000 R09: 0000000000000008 R10: ffffffffb9515be0 R11: ffffac6246a1f678 R12: ffff98b28f538000 R13: 0000000000000001 R14: 0000000000000000 R15: ffffffffc05451e0 FS: 00007fde0f688800(0000) GS:ffff98baffd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000014 CR3: 000000104bb0a003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: do_setlink+0x994/0xfe0 __rtnl_newlink+0x544/0x8d0 rtnl_newlink+0x47/0x70 rtnetlink_rcv_msg+0x29f/0x350 netlink_rcv_skb+0x4a/0x110 netlink_unicast+0x21d/0x300 netlink_sendmsg+0x329/0x450 sock_sendmsg+0x5b/0x60 ____sys_sendmsg+0x204/0x280 ___sys_sendmsg+0x88/0xd0 __sys_sendmsg+0x5e/0xa0 do_syscall_64+0x47/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: c0c050c58d840 ("bnxt_en: New Broadcom ethernet driver.") Reported-by: Fei Liu CC: Jonathan Toppins CC: Michael Chan Signed-off-by: Davide Caratti Reviewed-by: Michael Chan Acked-by: Jonathan Toppins Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 3a9a51f7063a..392e32c7122a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -396,6 +396,7 @@ static void bnxt_free_vf_resources(struct bnxt *bp) } } + bp->pf.active_vfs = 0; kfree(bp->pf.vf); bp->pf.vf = NULL; } @@ -835,7 +836,6 @@ void bnxt_sriov_disable(struct bnxt *bp) bnxt_free_vf_resources(bp); - bp->pf.active_vfs = 0; /* Reclaim all resources for the PF. */ rtnl_lock(); bnxt_restore_pf_fw_resources(bp); From 515a10a701d570e26dfbe6ee373f77c8bf11053f Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 10 Jul 2020 14:46:41 +0200 Subject: [PATCH 333/343] net: macb: fix wakeup test in runtime suspend/resume routines Use the proper struct device pointer to check if the wakeup flag and wakeup source are positioned. Use the one passed by function call which is equivalent to &bp->dev->dev.parent. It's preventing the trigger of a spurious interrupt in case the Wake-on-Lan feature is used. Fixes: d54f89af6cc4 ("net: macb: Add pm runtime support") Cc: Claudiu Beznea Cc: Harini Katakam Reviewed-by: Florian Fainelli Signed-off-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 52582e8ed90e..55e680f35022 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4654,7 +4654,7 @@ static int __maybe_unused macb_runtime_suspend(struct device *dev) struct net_device *netdev = dev_get_drvdata(dev); struct macb *bp = netdev_priv(netdev); - if (!(device_may_wakeup(&bp->dev->dev))) { + if (!(device_may_wakeup(dev))) { clk_disable_unprepare(bp->tx_clk); clk_disable_unprepare(bp->hclk); clk_disable_unprepare(bp->pclk); @@ -4670,7 +4670,7 @@ static int __maybe_unused macb_runtime_resume(struct device *dev) struct net_device *netdev = dev_get_drvdata(dev); struct macb *bp = netdev_priv(netdev); - if (!(device_may_wakeup(&bp->dev->dev))) { + if (!(device_may_wakeup(dev))) { clk_prepare_enable(bp->pclk); clk_prepare_enable(bp->hclk); clk_prepare_enable(bp->tx_clk); From ced4799d06375929e013eea04ba6908207afabbe Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 10 Jul 2020 14:46:42 +0200 Subject: [PATCH 334/343] net: macb: mark device wake capable when "magic-packet" property present Change the way the "magic-packet" DT property is handled in the macb_probe() function, matching DT binding documentation. Now we mark the device as "wakeup capable" instead of calling the device_init_wakeup() function that would enable the wakeup source. For Ethernet WoL, enabling the wakeup_source is done by using ethtool and associated macb_set_wol() function that already calls device_set_wakeup_enable() for this purpose. That would reduce power consumption by cutting more clocks if "magic-packet" property is set but WoL is not configured by ethtool. Fixes: 3e2a5e153906 ("net: macb: add wake-on-lan support via magic packet") Cc: Claudiu Beznea Cc: Harini Katakam Cc: Sergio Prado Reviewed-by: Florian Fainelli Signed-off-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 55e680f35022..4cafe343c0a2 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4422,7 +4422,7 @@ static int macb_probe(struct platform_device *pdev) bp->wol = 0; if (of_get_property(np, "magic-packet", NULL)) bp->wol |= MACB_WOL_HAS_MAGIC_PACKET; - device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET); + device_set_wakeup_capable(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET); spin_lock_init(&bp->lock); From 253fe09435045ab9346a8e364299d971185ae031 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 10 Jul 2020 14:46:43 +0200 Subject: [PATCH 335/343] net: macb: fix macb_get/set_wol() when moving to phylink Keep previous function goals and integrate phylink actions to them. phylink_ethtool_get_wol() is not enough to figure out if Ethernet driver supports Wake-on-Lan. Initialization of "supported" and "wolopts" members is done in phylink function, no need to keep them in calling function. phylink_ethtool_set_wol() return value is considered and determines if the MAC has to handle WoL or not. The case where the PHY doesn't implement WoL leads to the MAC configuring it to provide this feature. Fixes: 7897b071ac3b ("net: macb: convert to phylink") Cc: Claudiu Beznea Cc: Harini Katakam Cc: Antoine Tenart Cc: Florian Fainelli Signed-off-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 4cafe343c0a2..79c2fe054303 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2821,11 +2821,13 @@ static void macb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct macb *bp = netdev_priv(netdev); - wol->supported = 0; - wol->wolopts = 0; - - if (bp->wol & MACB_WOL_HAS_MAGIC_PACKET) + if (bp->wol & MACB_WOL_HAS_MAGIC_PACKET) { phylink_ethtool_get_wol(bp->phylink, wol); + wol->supported |= WAKE_MAGIC; + + if (bp->wol & MACB_WOL_ENABLED) + wol->wolopts |= WAKE_MAGIC; + } } static int macb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) @@ -2833,9 +2835,13 @@ static int macb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) struct macb *bp = netdev_priv(netdev); int ret; + /* Pass the order to phylink layer */ ret = phylink_ethtool_set_wol(bp->phylink, wol); - if (!ret) - return 0; + /* Don't manage WoL on MAC if handled by the PHY + * or if there's a failure in talking to the PHY + */ + if (!ret || ret != -EOPNOTSUPP) + return ret; if (!(bp->wol & MACB_WOL_HAS_MAGIC_PACKET) || (wol->wolopts & ~WAKE_MAGIC)) From 64febc5e56c9a748162f206dcc5be1a44436087a Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 10 Jul 2020 14:46:44 +0200 Subject: [PATCH 336/343] net: macb: fix macb_suspend() by removing call to netif_carrier_off() As we now use the phylink call to phylink_stop() in the non-WoL path, there is no need for this call to netif_carrier_off() anymore. It can disturb the underlying phylink FSM. Fixes: 7897b071ac3b ("net: macb: convert to phylink") Cc: Claudiu Beznea Cc: Harini Katakam Cc: Antoine Tenart Reviewed-by: Florian Fainelli Signed-off-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 79c2fe054303..548815255e22 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4604,7 +4604,6 @@ static int __maybe_unused macb_suspend(struct device *dev) bp->pm_data.scrt2 = gem_readl_n(bp, ETHT, SCRT2_ETHT); } - netif_carrier_off(netdev); if (bp->ptp_info) bp->ptp_info->ptp_remove(netdev); pm_runtime_force_suspend(dev); From 6c8f85cac98a4c6b767c4c4f6af7283724c32b47 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 10 Jul 2020 14:46:45 +0200 Subject: [PATCH 337/343] net: macb: fix call to pm_runtime in the suspend/resume functions The calls to pm_runtime_force_suspend/resume() functions are only relevant if the device is not configured to act as a WoL wakeup source. Add the device_may_wakeup() test before calling them. Fixes: 3e2a5e153906 ("net: macb: add wake-on-lan support via magic packet") Cc: Claudiu Beznea Cc: Harini Katakam Cc: Sergio Prado Reviewed-by: Florian Fainelli Signed-off-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 548815255e22..f1f0976e7669 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4606,7 +4606,8 @@ static int __maybe_unused macb_suspend(struct device *dev) if (bp->ptp_info) bp->ptp_info->ptp_remove(netdev); - pm_runtime_force_suspend(dev); + if (!device_may_wakeup(dev)) + pm_runtime_force_suspend(dev); return 0; } @@ -4621,7 +4622,8 @@ static int __maybe_unused macb_resume(struct device *dev) if (!netif_running(netdev)) return 0; - pm_runtime_force_resume(dev); + if (!device_may_wakeup(dev)) + pm_runtime_force_resume(dev); if (bp->wol & MACB_WOL_ENABLED) { macb_writel(bp, IDR, MACB_BIT(WOL)); From d9d5420273997664a1c09151ca86ac993f2f89c1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 10 Jul 2020 16:41:38 +0300 Subject: [PATCH 338/343] mlxsw: spectrum_router: Remove inappropriate usage of WARN_ON() We should not trigger a warning when a memory allocation fails. Remove the WARN_ON(). The warning is constantly triggered by syzkaller when it is injecting faults: [ 2230.758664] FAULT_INJECTION: forcing a failure. [ 2230.758664] name failslab, interval 1, probability 0, space 0, times 0 [ 2230.762329] CPU: 3 PID: 1407 Comm: syz-executor.0 Not tainted 5.8.0-rc2+ #28 ... [ 2230.898175] WARNING: CPU: 3 PID: 1407 at drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:6265 mlxsw_sp_router_fib_event+0xfad/0x13e0 [ 2230.898179] Kernel panic - not syncing: panic_on_warn set ... [ 2230.898183] CPU: 3 PID: 1407 Comm: syz-executor.0 Not tainted 5.8.0-rc2+ #28 [ 2230.898190] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Fixes: 3057224e014c ("mlxsw: spectrum_router: Implement FIB offload in deferred work") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 770de0222e7b..019ed503aadf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -6262,7 +6262,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, } fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); - if (WARN_ON(!fib_work)) + if (!fib_work) return NOTIFY_BAD; fib_work->mlxsw_sp = router->mlxsw_sp; From c4317b11675b99af6641662ebcbd3c6010600e64 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 10 Jul 2020 16:41:39 +0300 Subject: [PATCH 339/343] mlxsw: pci: Fix use-after-free in case of failed devlink reload In case devlink reload failed, it is possible to trigger a use-after-free when querying the kernel for device info via 'devlink dev info' [1]. This happens because as part of the reload error path the PCI command interface is de-initialized and its mailboxes are freed. When the devlink '->info_get()' callback is invoked the device is queried via the command interface and the freed mailboxes are accessed. Fix this by initializing the command interface once during probe and not during every reload. This is consistent with the other bus used by mlxsw (i.e., 'mlxsw_i2c') and also allows user space to query the running firmware version (for example) from the device after a failed reload. [1] BUG: KASAN: use-after-free in memcpy include/linux/string.h:406 [inline] BUG: KASAN: use-after-free in mlxsw_pci_cmd_exec+0x177/0xa60 drivers/net/ethernet/mellanox/mlxsw/pci.c:1675 Write of size 4096 at addr ffff88810ae32000 by task syz-executor.1/2355 CPU: 1 PID: 2355 Comm: syz-executor.1 Not tainted 5.8.0-rc2+ #29 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xf6/0x16e lib/dump_stack.c:118 print_address_description.constprop.0+0x1c/0x250 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 check_memory_region_inline mm/kasan/generic.c:186 [inline] check_memory_region+0x14e/0x1b0 mm/kasan/generic.c:192 memcpy+0x39/0x60 mm/kasan/common.c:106 memcpy include/linux/string.h:406 [inline] mlxsw_pci_cmd_exec+0x177/0xa60 drivers/net/ethernet/mellanox/mlxsw/pci.c:1675 mlxsw_cmd_exec+0x249/0x550 drivers/net/ethernet/mellanox/mlxsw/core.c:2335 mlxsw_cmd_access_reg drivers/net/ethernet/mellanox/mlxsw/cmd.h:859 [inline] mlxsw_core_reg_access_cmd drivers/net/ethernet/mellanox/mlxsw/core.c:1938 [inline] mlxsw_core_reg_access+0x2f6/0x540 drivers/net/ethernet/mellanox/mlxsw/core.c:1985 mlxsw_reg_query drivers/net/ethernet/mellanox/mlxsw/core.c:2000 [inline] mlxsw_devlink_info_get+0x17f/0x6e0 drivers/net/ethernet/mellanox/mlxsw/core.c:1090 devlink_nl_info_fill.constprop.0+0x13c/0x2d0 net/core/devlink.c:4588 devlink_nl_cmd_info_get_dumpit+0x246/0x460 net/core/devlink.c:4648 genl_lock_dumpit+0x85/0xc0 net/netlink/genetlink.c:575 netlink_dump+0x515/0xe50 net/netlink/af_netlink.c:2245 __netlink_dump_start+0x53d/0x830 net/netlink/af_netlink.c:2353 genl_family_rcv_msg_dumpit.isra.0+0x296/0x300 net/netlink/genetlink.c:638 genl_family_rcv_msg net/netlink/genetlink.c:733 [inline] genl_rcv_msg+0x78d/0x9d0 net/netlink/genetlink.c:753 netlink_rcv_skb+0x152/0x440 net/netlink/af_netlink.c:2469 genl_rcv+0x24/0x40 net/netlink/genetlink.c:764 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x53a/0x750 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x850/0xd90 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0x150/0x190 net/socket.c:672 ____sys_sendmsg+0x6d8/0x840 net/socket.c:2363 ___sys_sendmsg+0xff/0x170 net/socket.c:2417 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2450 do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:359 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: a9c8336f6544 ("mlxsw: core: Add support for devlink info command") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 54 ++++++++++++++++------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index fd0e97de44e7..c04ec1a92826 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1414,23 +1414,12 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, u16 num_pages; int err; - mutex_init(&mlxsw_pci->cmd.lock); - init_waitqueue_head(&mlxsw_pci->cmd.wait); - mlxsw_pci->core = mlxsw_core; mbox = mlxsw_cmd_mbox_alloc(); if (!mbox) return -ENOMEM; - err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); - if (err) - goto mbox_put; - - err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.out_mbox); - if (err) - goto err_out_mbox_alloc; - err = mlxsw_pci_sw_reset(mlxsw_pci, mlxsw_pci->id); if (err) goto err_sw_reset; @@ -1537,9 +1526,6 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, mlxsw_pci_free_irq_vectors(mlxsw_pci); err_alloc_irq: err_sw_reset: - mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox); -err_out_mbox_alloc: - mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); mbox_put: mlxsw_cmd_mbox_free(mbox); return err; @@ -1553,8 +1539,6 @@ static void mlxsw_pci_fini(void *bus_priv) mlxsw_pci_aqs_fini(mlxsw_pci); mlxsw_pci_fw_area_fini(mlxsw_pci); mlxsw_pci_free_irq_vectors(mlxsw_pci); - mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox); - mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); } static struct mlxsw_pci_queue * @@ -1776,6 +1760,37 @@ static const struct mlxsw_bus mlxsw_pci_bus = { .features = MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET, }; +static int mlxsw_pci_cmd_init(struct mlxsw_pci *mlxsw_pci) +{ + int err; + + mutex_init(&mlxsw_pci->cmd.lock); + init_waitqueue_head(&mlxsw_pci->cmd.wait); + + err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); + if (err) + goto err_in_mbox_alloc; + + err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.out_mbox); + if (err) + goto err_out_mbox_alloc; + + return 0; + +err_out_mbox_alloc: + mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); +err_in_mbox_alloc: + mutex_destroy(&mlxsw_pci->cmd.lock); + return err; +} + +static void mlxsw_pci_cmd_fini(struct mlxsw_pci *mlxsw_pci) +{ + mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox); + mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); + mutex_destroy(&mlxsw_pci->cmd.lock); +} + static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { const char *driver_name = pdev->driver->name; @@ -1831,6 +1846,10 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mlxsw_pci->pdev = pdev; pci_set_drvdata(pdev, mlxsw_pci); + err = mlxsw_pci_cmd_init(mlxsw_pci); + if (err) + goto err_pci_cmd_init; + mlxsw_pci->bus_info.device_kind = driver_name; mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev); mlxsw_pci->bus_info.dev = &pdev->dev; @@ -1848,6 +1867,8 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_bus_device_register: + mlxsw_pci_cmd_fini(mlxsw_pci); +err_pci_cmd_init: iounmap(mlxsw_pci->hw_addr); err_ioremap: err_pci_resource_len_check: @@ -1865,6 +1886,7 @@ static void mlxsw_pci_remove(struct pci_dev *pdev) struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev); mlxsw_core_bus_device_unregister(mlxsw_pci->core, false); + mlxsw_pci_cmd_fini(mlxsw_pci); iounmap(mlxsw_pci->hw_addr); pci_release_regions(mlxsw_pci->pdev); pci_disable_device(mlxsw_pci->pdev); From 9321f1aaf63e74ec3884347490e4ebb039f01b6e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 10 Jul 2020 15:34:41 -0700 Subject: [PATCH 340/343] mips: Remove compiler check in unroll macro CONFIG_CC_IS_GCC is undefined when Clang is used, which breaks the build (see our Travis link below). Clang 8 was chosen as a minimum version for this check because there were some improvements around __builtin_constant_p in that release. In reality, MIPS was not even buildable until clang 9 so that check was not technically necessary. Just remove all compiler checks and just assume that we have a working compiler. Fixes: d4e60453266b ("Restore gcc check in mips asm/unroll.h") Link: https://travis-ci.com/github/ClangBuiltLinux/continuous-integration/jobs/359642821 Signed-off-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- arch/mips/include/asm/unroll.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h index 49009319ac2c..7dd4a80e05d6 100644 --- a/arch/mips/include/asm/unroll.h +++ b/arch/mips/include/asm/unroll.h @@ -25,9 +25,7 @@ * generate reasonable code for the switch statement, \ * so we skip the sanity check for those compilers. \ */ \ - BUILD_BUG_ON((CONFIG_CC_IS_GCC || \ - CONFIG_CLANG_VERSION >= 80000) && \ - !__builtin_constant_p(times)); \ + BUILD_BUG_ON(!__builtin_constant_p(times)); \ \ switch (times) { \ case 32: fn(__VA_ARGS__); /* fall through */ \ From dd821e0c95a64b5923a0c57f07d3f7563553e756 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 12 Jul 2020 13:23:08 +0300 Subject: [PATCH 341/343] io_uring: fix missing msg_name assignment Ensure to set msg.msg_name for the async portion of send/recvmsg, as the header copy will copy to/from it. Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index ca8abde48b6c..5570d6aeaff8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3553,6 +3553,7 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->flags & REQ_F_NEED_CLEANUP) return 0; + io->msg.msg.msg_name = &io->msg.addr; io->msg.iov = io->msg.fast_iov; ret = sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags, &io->msg.iov); @@ -3734,6 +3735,7 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_ctx *io) { + io->msg.msg.msg_name = &io->msg.addr; io->msg.iov = io->msg.fast_iov; #ifdef CONFIG_COMPAT From 16d598030a37853a7a6b4384cad19c9c0af2f021 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 12 Jul 2020 16:16:47 +0300 Subject: [PATCH 342/343] io_uring: fix not initialised work->flags 59960b9deb535 ("io_uring: fix lazy work init") tried to fix missing io_req_init_async(), but left out work.flags and hash. Do it earlier. Fixes: 7cdaf587de7c ("io_uring: avoid whole io_wq_work copy for requests completed inline") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5570d6aeaff8..9fd7e69696c3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1096,6 +1096,8 @@ static inline void io_prep_async_work(struct io_kiocb *req, { const struct io_op_def *def = &io_op_defs[req->opcode]; + io_req_init_async(req); + if (req->flags & REQ_F_ISREG) { if (def->hash_reg_file) io_wq_hash_work(&req->work, file_inode(req->file)); @@ -1104,7 +1106,6 @@ static inline void io_prep_async_work(struct io_kiocb *req, req->work.flags |= IO_WQ_WORK_UNBOUND; } - io_req_init_async(req); io_req_work_grab_env(req, def); *link = io_prep_linked_timeout(req); From 11ba468877bb23f28956a35e896356252d63c983 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Jul 2020 16:34:50 -0700 Subject: [PATCH 343/343] Linux 5.8-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fe0164a654c7..0b5f8538bde5 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Kleptomaniac Octopus # *DOCUMENTATION*