From f80531c8217c5b348058a0280f3be05835abd737 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 11 Jun 2020 14:05:34 +0300 Subject: [PATCH 001/122] i2c: Use separate MODULE_AUTHOR() statements for multiple authors Modules with multiple authors should use multiple MODULE_AUTHOR() statements according to comment in include/linux/module.h. Split the i2c modules with multiple authors to use multiple MODULE_AUTHOR() statements. Signed-off-by: Jarkko Nikula Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/algos/i2c-algo-pca.c | 4 ++-- drivers/i2c/busses/i2c-ali1535.c | 8 ++++---- drivers/i2c/busses/i2c-ali15x3.c | 6 +++--- drivers/i2c/busses/i2c-emev2.c | 3 ++- drivers/i2c/busses/i2c-i801.c | 3 ++- drivers/i2c/busses/i2c-nomadik.c | 3 ++- drivers/i2c/busses/i2c-piix4.c | 4 ++-- drivers/i2c/busses/i2c-pnx.c | 3 ++- drivers/i2c/busses/i2c-sh_mobile.c | 3 ++- drivers/i2c/busses/i2c-sibyte.c | 3 ++- drivers/i2c/busses/i2c-sirf.c | 4 ++-- drivers/i2c/busses/i2c-viapro.c | 6 +++--- drivers/i2c/i2c-dev.c | 4 ++-- 13 files changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index 7f10312d1b88..8620963e442c 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -541,8 +541,8 @@ int i2c_pca_add_numbered_bus(struct i2c_adapter *adap) } EXPORT_SYMBOL(i2c_pca_add_numbered_bus); -MODULE_AUTHOR("Ian Campbell , " - "Wolfram Sang "); +MODULE_AUTHOR("Ian Campbell "); +MODULE_AUTHOR("Wolfram Sang "); MODULE_DESCRIPTION("I2C-Bus PCA9564/PCA9665 algorithm"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c index a43deea390f5..fb93152845f4 100644 --- a/drivers/i2c/busses/i2c-ali1535.c +++ b/drivers/i2c/busses/i2c-ali1535.c @@ -519,9 +519,9 @@ static struct pci_driver ali1535_driver = { module_pci_driver(ali1535_driver); -MODULE_AUTHOR("Frodo Looijaard , " - "Philip Edelbrock , " - "Mark D. Studebaker " - "and Dan Eaton "); +MODULE_AUTHOR("Frodo Looijaard "); +MODULE_AUTHOR("Philip Edelbrock "); +MODULE_AUTHOR("Mark D. Studebaker "); +MODULE_AUTHOR("Dan Eaton "); MODULE_DESCRIPTION("ALI1535 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c index 02185a1cfa77..cc58feacd082 100644 --- a/drivers/i2c/busses/i2c-ali15x3.c +++ b/drivers/i2c/busses/i2c-ali15x3.c @@ -502,8 +502,8 @@ static struct pci_driver ali15x3_driver = { module_pci_driver(ali15x3_driver); -MODULE_AUTHOR ("Frodo Looijaard , " - "Philip Edelbrock , " - "and Mark D. Studebaker "); +MODULE_AUTHOR("Frodo Looijaard "); +MODULE_AUTHOR("Philip Edelbrock "); +MODULE_AUTHOR("Mark D. Studebaker "); MODULE_DESCRIPTION("ALI15X3 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-emev2.c b/drivers/i2c/busses/i2c-emev2.c index 1a319352e51b..a08554c1a570 100644 --- a/drivers/i2c/busses/i2c-emev2.c +++ b/drivers/i2c/busses/i2c-emev2.c @@ -442,6 +442,7 @@ static struct platform_driver em_i2c_driver = { module_platform_driver(em_i2c_driver); MODULE_DESCRIPTION("EMEV2 I2C bus driver"); -MODULE_AUTHOR("Ian Molton and Wolfram Sang "); +MODULE_AUTHOR("Ian Molton"); +MODULE_AUTHOR("Wolfram Sang "); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, em_i2c_ids); diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index fea644921a76..1fc7ae77753d 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1986,7 +1986,8 @@ static void __exit i2c_i801_exit(void) pci_unregister_driver(&i801_driver); } -MODULE_AUTHOR("Mark D. Studebaker , Jean Delvare "); +MODULE_AUTHOR("Mark D. Studebaker "); +MODULE_AUTHOR("Jean Delvare "); MODULE_DESCRIPTION("I801 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index e1e8d4ef9aa7..d4b1b0865f67 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -1122,6 +1122,7 @@ static void __exit nmk_i2c_exit(void) subsys_initcall(nmk_i2c_init); module_exit(nmk_i2c_exit); -MODULE_AUTHOR("Sachin Verma, Srinidhi KASAGAR"); +MODULE_AUTHOR("Sachin Verma"); +MODULE_AUTHOR("Srinidhi KASAGAR"); MODULE_DESCRIPTION("Nomadik/Ux500 I2C driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 69740a4ff1db..8c1b31ed0c42 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -1032,7 +1032,7 @@ static struct pci_driver piix4_driver = { module_pci_driver(piix4_driver); -MODULE_AUTHOR("Frodo Looijaard and " - "Philip Edelbrock "); +MODULE_AUTHOR("Frodo Looijaard "); +MODULE_AUTHOR("Philip Edelbrock "); MODULE_DESCRIPTION("PIIX4 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index 5d7207c10f1d..8c4ec7f13f5a 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -781,7 +781,8 @@ static void __exit i2c_adap_pnx_exit(void) platform_driver_unregister(&i2c_pnx_driver); } -MODULE_AUTHOR("Vitaly Wool, Dennis Kovalev "); +MODULE_AUTHOR("Vitaly Wool"); +MODULE_AUTHOR("Dennis Kovalev "); MODULE_DESCRIPTION("I2C driver for Philips IP3204-based I2C busses"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:pnx-i2c"); diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index 2cca1b21e26e..cab725559999 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -932,6 +932,7 @@ static void __exit sh_mobile_i2c_adap_exit(void) module_exit(sh_mobile_i2c_adap_exit); MODULE_DESCRIPTION("SuperH Mobile I2C Bus Controller driver"); -MODULE_AUTHOR("Magnus Damm and Wolfram Sang"); +MODULE_AUTHOR("Magnus Damm"); +MODULE_AUTHOR("Wolfram Sang"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:i2c-sh_mobile"); diff --git a/drivers/i2c/busses/i2c-sibyte.c b/drivers/i2c/busses/i2c-sibyte.c index 9dcea2ba7168..8f71f01cb169 100644 --- a/drivers/i2c/busses/i2c-sibyte.c +++ b/drivers/i2c/busses/i2c-sibyte.c @@ -180,6 +180,7 @@ static void __exit i2c_sibyte_exit(void) module_init(i2c_sibyte_init); module_exit(i2c_sibyte_exit); -MODULE_AUTHOR("Kip Walker (Broadcom Corp.), Steven J. Hill "); +MODULE_AUTHOR("Kip Walker (Broadcom Corp.)"); +MODULE_AUTHOR("Steven J. Hill "); MODULE_DESCRIPTION("SMBus adapter routines for SiByte boards"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-sirf.c b/drivers/i2c/busses/i2c-sirf.c index d7f72ec331e8..30db8fafe078 100644 --- a/drivers/i2c/busses/i2c-sirf.c +++ b/drivers/i2c/busses/i2c-sirf.c @@ -470,6 +470,6 @@ static struct platform_driver i2c_sirfsoc_driver = { module_platform_driver(i2c_sirfsoc_driver); MODULE_DESCRIPTION("SiRF SoC I2C master controller driver"); -MODULE_AUTHOR("Zhiwu Song , " - "Xiangzhen Ye "); +MODULE_AUTHOR("Zhiwu Song "); +MODULE_AUTHOR("Xiangzhen Ye "); MODULE_LICENSE("GPL v2"); diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c index 4abc7771af06..05aa92a3fbe0 100644 --- a/drivers/i2c/busses/i2c-viapro.c +++ b/drivers/i2c/busses/i2c-viapro.c @@ -489,9 +489,9 @@ static void __exit i2c_vt596_exit(void) } } -MODULE_AUTHOR("Kyosti Malkki , " - "Mark D. Studebaker and " - "Jean Delvare "); +MODULE_AUTHOR("Kyosti Malkki "); +MODULE_AUTHOR("Mark D. Studebaker "); +MODULE_AUTHOR("Jean Delvare "); MODULE_DESCRIPTION("vt82c596 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index da020acc9bbd..6ceb11cc4be1 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -761,8 +761,8 @@ static void __exit i2c_dev_exit(void) unregister_chrdev_region(MKDEV(I2C_MAJOR, 0), I2C_MINORS); } -MODULE_AUTHOR("Frodo Looijaard and " - "Simon G. Vogl "); +MODULE_AUTHOR("Frodo Looijaard "); +MODULE_AUTHOR("Simon G. Vogl "); MODULE_DESCRIPTION("I2C /dev entries driver"); MODULE_LICENSE("GPL"); From f3e2bd713730e6d69e8e021d3ff574f01c39e7e3 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 23 Jun 2020 13:06:46 +0100 Subject: [PATCH 002/122] i2c: rk3x: support master_xfer_atomic Enable i2c transactions in irq disabled contexts like poweroff where the PMIC is connected via i2c. Signed-off-by: John Keeping Tested-by: Heiko Stuebner Reviewed-by: Heiko Stuebner Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rk3x.c | 39 +++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c index bc698240c4aa..b67bb54caf27 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -1040,8 +1041,21 @@ static int rk3x_i2c_setup(struct rk3x_i2c *i2c, struct i2c_msg *msgs, int num) return ret; } -static int rk3x_i2c_xfer(struct i2c_adapter *adap, - struct i2c_msg *msgs, int num) +static int rk3x_i2c_wait_xfer_poll(struct rk3x_i2c *i2c) +{ + ktime_t timeout = ktime_add_ms(ktime_get(), WAIT_TIMEOUT); + + while (READ_ONCE(i2c->busy) && + ktime_compare(ktime_get(), timeout) < 0) { + udelay(5); + rk3x_i2c_irq(0, i2c); + } + + return !i2c->busy; +} + +static int rk3x_i2c_xfer_common(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num, bool polling) { struct rk3x_i2c *i2c = (struct rk3x_i2c *)adap->algo_data; unsigned long timeout, flags; @@ -1075,8 +1089,12 @@ static int rk3x_i2c_xfer(struct i2c_adapter *adap, rk3x_i2c_start(i2c); - timeout = wait_event_timeout(i2c->wait, !i2c->busy, - msecs_to_jiffies(WAIT_TIMEOUT)); + if (!polling) { + timeout = wait_event_timeout(i2c->wait, !i2c->busy, + msecs_to_jiffies(WAIT_TIMEOUT)); + } else { + timeout = rk3x_i2c_wait_xfer_poll(i2c); + } spin_lock_irqsave(&i2c->lock, flags); @@ -1110,6 +1128,18 @@ static int rk3x_i2c_xfer(struct i2c_adapter *adap, return ret < 0 ? ret : num; } +static int rk3x_i2c_xfer(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + return rk3x_i2c_xfer_common(adap, msgs, num, false); +} + +static int rk3x_i2c_xfer_polling(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + return rk3x_i2c_xfer_common(adap, msgs, num, true); +} + static __maybe_unused int rk3x_i2c_resume(struct device *dev) { struct rk3x_i2c *i2c = dev_get_drvdata(dev); @@ -1126,6 +1156,7 @@ static u32 rk3x_i2c_func(struct i2c_adapter *adap) static const struct i2c_algorithm rk3x_i2c_algorithm = { .master_xfer = rk3x_i2c_xfer, + .master_xfer_atomic = rk3x_i2c_xfer_polling, .functionality = rk3x_i2c_func, }; From 3f11011573f9bab446b635763c5b6eb92ffdc1e2 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 14 Jul 2020 15:18:39 +0800 Subject: [PATCH 003/122] dt-bindings: i2c: mv64xxx: Add compatible for the A100 i2c node. Allwinner A100 have a mv64xxx i2c interface available to be used. Signed-off-by: Yangtao Li Reviewed-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml index 2ceb05ba2df5..5b5ae402f97a 100644 --- a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml @@ -26,6 +26,9 @@ properties: - items: - const: allwinner,sun50i-a64-i2c - const: allwinner,sun6i-a31-i2c + - items: + - const: allwinner,sun50i-a100-i2c + - const: allwinner,sun6i-a31-i2c - items: - const: allwinner,sun50i-h6-i2c - const: allwinner,sun6i-a31-i2c From 4db7e1786db505eee86e6301cd42967f4da43be8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 21 Jul 2020 18:05:10 -0500 Subject: [PATCH 004/122] i2c: busses: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. [1] https://www.kernel.org/doc/html/v5.7/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Acked-by: Baruch Siach Reviewed-by: Jean Delvare Reviewed-by: Brendan Higgins Reviewed-by: Gregory CLEMENT Reviewed-by: Andy Shevchenko Signed-off-by: Gustavo A. R. Silva Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-amd8111.c | 2 +- drivers/i2c/busses/i2c-aspeed.c | 4 ++-- drivers/i2c/busses/i2c-designware-pcidrv.c | 2 +- drivers/i2c/busses/i2c-digicolor.c | 2 +- drivers/i2c/busses/i2c-i801.c | 8 ++++---- drivers/i2c/busses/i2c-mv64xxx.c | 9 ++++----- drivers/i2c/busses/i2c-synquacer.c | 3 +-- drivers/i2c/busses/i2c-viapro.c | 2 +- drivers/i2c/busses/scx200_acb.c | 2 +- drivers/i2c/i2c-slave-eeprom.c | 2 +- 10 files changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/i2c/busses/i2c-amd8111.c b/drivers/i2c/busses/i2c-amd8111.c index 2b14fef5bf26..34862ad3423e 100644 --- a/drivers/i2c/busses/i2c-amd8111.c +++ b/drivers/i2c/busses/i2c-amd8111.c @@ -381,7 +381,7 @@ static s32 amd8111_access(struct i2c_adapter * adap, u16 addr, if (status) return status; len = min_t(u8, len, I2C_SMBUS_BLOCK_MAX); - /* fall through */ + fallthrough; case I2C_SMBUS_I2C_BLOCK_DATA: for (i = 0; i < len; i++) { status = amd_ec_read(smbus, AMD_SMB_DATA + i, diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index f51702d86a90..31268074c422 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -504,7 +504,7 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) goto error_and_stop; } irq_handled |= ASPEED_I2CD_INTR_TX_ACK; - /* fall through */ + fallthrough; case ASPEED_I2C_MASTER_TX_FIRST: if (bus->buf_index < msg->len) { bus->master_state = ASPEED_I2C_MASTER_TX; @@ -520,7 +520,7 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) /* RX may not have completed yet (only address cycle) */ if (!(irq_status & ASPEED_I2CD_INTR_RX_DONE)) goto out_no_complete; - /* fall through */ + fallthrough; case ASPEED_I2C_MASTER_RX: if (unlikely(!(irq_status & ASPEED_I2CD_INTR_RX_DONE))) { dev_err(bus->dev, "master failed to RX\n"); diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index 8522134f9ea9..55c83a7a24f3 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -90,7 +90,7 @@ static int mfld_setup(struct pci_dev *pdev, struct dw_pci_controller *c) switch (pdev->device) { case 0x0817: dev->timings.bus_freq_hz = I2C_MAX_STANDARD_MODE_FREQ; - /* fall through */ + fallthrough; case 0x0818: case 0x0819: c->bus_num = pdev->device - 0x817 + 3; diff --git a/drivers/i2c/busses/i2c-digicolor.c b/drivers/i2c/busses/i2c-digicolor.c index 332f00437479..f67639dc74b7 100644 --- a/drivers/i2c/busses/i2c-digicolor.c +++ b/drivers/i2c/busses/i2c-digicolor.c @@ -187,7 +187,7 @@ static irqreturn_t dc_i2c_irq(int irq, void *dev_id) break; } i2c->state = STATE_WRITE; - /* fall through */ + fallthrough; case STATE_WRITE: if (i2c->msgbuf_ptr < i2c->msg->len) dc_i2c_write_buf(i2c); diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 1fc7ae77753d..638e7f7c66cc 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1765,19 +1765,19 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS1: case PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS2: priv->features |= FEATURE_IDF; - /* fall through */ + fallthrough; default: priv->features |= FEATURE_BLOCK_PROC; priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; - /* fall through */ + fallthrough; case PCI_DEVICE_ID_INTEL_82801DB_3: priv->features |= FEATURE_SMBUS_PEC; priv->features |= FEATURE_BLOCK_BUFFER; - /* fall through */ + fallthrough; case PCI_DEVICE_ID_INTEL_82801CA_3: priv->features |= FEATURE_HOST_NOTIFY; - /* fall through */ + fallthrough; case PCI_DEVICE_ID_INTEL_82801BA_2: case PCI_DEVICE_ID_INTEL_82801AB_3: case PCI_DEVICE_ID_INTEL_82801AA_3: diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 829b8c98ae51..8d9d4ffdcd24 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -251,7 +251,7 @@ mv64xxx_i2c_fsm(struct mv64xxx_i2c_data *drv_data, u32 status) MV64XXX_I2C_STATE_WAITING_FOR_ADDR_2_ACK; break; } - /* FALLTHRU */ + fallthrough; case MV64XXX_I2C_STATUS_MAST_WR_ADDR_2_ACK: /* 0xd0 */ case MV64XXX_I2C_STATUS_MAST_WR_ACK: /* 0x28 */ if ((drv_data->bytes_left == 0) @@ -282,14 +282,14 @@ mv64xxx_i2c_fsm(struct mv64xxx_i2c_data *drv_data, u32 status) MV64XXX_I2C_STATE_WAITING_FOR_ADDR_2_ACK; break; } - /* FALLTHRU */ + fallthrough; case MV64XXX_I2C_STATUS_MAST_RD_ADDR_2_ACK: /* 0xe0 */ if (drv_data->bytes_left == 0) { drv_data->action = MV64XXX_I2C_ACTION_SEND_STOP; drv_data->state = MV64XXX_I2C_STATE_IDLE; break; } - /* FALLTHRU */ + fallthrough; case MV64XXX_I2C_STATUS_MAST_RD_DATA_ACK: /* 0x50 */ if (status != MV64XXX_I2C_STATUS_MAST_RD_DATA_ACK) drv_data->action = MV64XXX_I2C_ACTION_CONTINUE; @@ -417,8 +417,7 @@ mv64xxx_i2c_do_action(struct mv64xxx_i2c_data *drv_data) "mv64xxx_i2c_do_action: Invalid action: %d\n", drv_data->action); drv_data->rc = -EIO; - - /* FALLTHRU */ + fallthrough; case MV64XXX_I2C_ACTION_SEND_STOP: drv_data->cntl_bits &= ~MV64XXX_I2C_REG_CONTROL_INTEN; writel(drv_data->cntl_bits | MV64XXX_I2C_REG_CONTROL_STOP, diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c index c9a3dba6a75d..31be1811d5e6 100644 --- a/drivers/i2c/busses/i2c-synquacer.c +++ b/drivers/i2c/busses/i2c-synquacer.c @@ -398,8 +398,7 @@ static irqreturn_t synquacer_i2c_isr(int irq, void *dev_id) if (i2c->state == STATE_READ) goto prepare_read; - - /* fall through */ + fallthrough; case STATE_WRITE: if (bsr & SYNQUACER_I2C_BSR_LRB) { diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c index 05aa92a3fbe0..970ccdcbb889 100644 --- a/drivers/i2c/busses/i2c-viapro.c +++ b/drivers/i2c/busses/i2c-viapro.c @@ -228,7 +228,7 @@ static s32 vt596_access(struct i2c_adapter *adap, u16 addr, goto exit_unsupported; if (read_write == I2C_SMBUS_READ) outb_p(data->block[0], SMBHSTDAT0); - /* Fall through */ + fallthrough; case I2C_SMBUS_BLOCK_DATA: outb_p(command, SMBHSTCMD); if (read_write == I2C_SMBUS_WRITE) { diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c index bd9afa383d12..7b42a18bd05c 100644 --- a/drivers/i2c/busses/scx200_acb.c +++ b/drivers/i2c/busses/scx200_acb.c @@ -151,7 +151,7 @@ static void scx200_acb_machine(struct scx200_acb_iface *iface, u8 status) case state_repeat_start: outb(inb(ACBCTL1) | ACBCTL1_START, ACBCTL1); - /* fallthrough */ + fallthrough; case state_quick: if (iface->address_byte & 1) { diff --git a/drivers/i2c/i2c-slave-eeprom.c b/drivers/i2c/i2c-slave-eeprom.c index 593f2fd39d17..5c7ae421cacf 100644 --- a/drivers/i2c/i2c-slave-eeprom.c +++ b/drivers/i2c/i2c-slave-eeprom.c @@ -66,7 +66,7 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client, case I2C_SLAVE_READ_PROCESSED: /* The previous byte made it to the bus, get next one */ eeprom->buffer_idx++; - /* fallthrough */ + fallthrough; case I2C_SLAVE_READ_REQUESTED: spin_lock(&eeprom->buffer_lock); *val = eeprom->buffer[eeprom->buffer_idx & eeprom->address_mask]; From 58031a26bf2b0c458511df146143c78ab569f4da Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 9 Jun 2020 15:15:55 -0500 Subject: [PATCH 005/122] i2c: fsi: Prevent adding adapters for ports without dts nodes Ports should be defined in the devicetree if they are to be enabled on the system. Signed-off-by: Eddie James Signed-off-by: Joel Stanley Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-fsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-fsi.c b/drivers/i2c/busses/i2c-fsi.c index 977d6f524649..10332693edf0 100644 --- a/drivers/i2c/busses/i2c-fsi.c +++ b/drivers/i2c/busses/i2c-fsi.c @@ -703,7 +703,7 @@ static int fsi_i2c_probe(struct device *dev) for (port_no = 0; port_no < ports; port_no++) { np = fsi_i2c_find_port_of_node(dev->of_node, port_no); - if (np && !of_device_is_available(np)) + if (!of_device_is_available(np)) continue; port = kzalloc(sizeof(*port), GFP_KERNEL); From 0c2a34937f7e4c4776bb261114c475392da2355c Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Jun 2020 18:24:40 +0200 Subject: [PATCH 006/122] i2c: revert "i2c: core: Allow drivers to disable i2c-core irq mapping" This manually reverts commit d1d84bb95364ed604015c2b788caaf3dbca0262f. The only user has gone two years ago with commit 589edb56b424 ("ACPI / scan: Create platform device for INT33FE ACPI nodes") and no new user has showed up. Remove and hope we will never need it again. Signed-off-by: Wolfram Sang Acked-by: Hans de Goede Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 6 +++--- include/linux/i2c.h | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 26f03a14a478..dc43242a85ba 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -319,11 +319,9 @@ static int i2c_device_probe(struct device *dev) if (!client) return 0; - driver = to_i2c_driver(dev->driver); - client->irq = client->init_irq; - if (!client->irq && !driver->disable_i2c_core_irq_mapping) { + if (!client->irq) { int irq = -ENOENT; if (client->flags & I2C_CLIENT_HOST_NOTIFY) { @@ -349,6 +347,8 @@ static int i2c_device_probe(struct device *dev) client->irq = irq; } + driver = to_i2c_driver(dev->driver); + /* * An I2C ID table is not mandatory, if and only if, a suitable OF * or ACPI ID table is supplied for the probing device. diff --git a/include/linux/i2c.h b/include/linux/i2c.h index b8b8963f8bb9..098405df431f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -231,7 +231,6 @@ enum i2c_alert_protocol { * @detect: Callback for device detection * @address_list: The I2C addresses to probe (for detect) * @clients: List of detected clients we created (for i2c-core use only) - * @disable_i2c_core_irq_mapping: Tell the i2c-core to not do irq-mapping * * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver. @@ -290,8 +289,6 @@ struct i2c_driver { int (*detect)(struct i2c_client *client, struct i2c_board_info *info); const unsigned short *address_list; struct list_head clients; - - bool disable_i2c_core_irq_mapping; }; #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) From 314139f9f0abdba61ed9a8463bbcb0bf900ac5a2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 29 Jun 2020 17:38:07 +0200 Subject: [PATCH 007/122] i2c: rcar: slave: only send STOP event when we have been addressed When the SSR interrupt is activated, it will detect every STOP condition on the bus, not only the ones after we have been addressed. So, enable this interrupt only after we have been addressed, and disable it otherwise. Fixes: de20d1857dd6 ("i2c: rcar: add slave support") Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rcar.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index a45c4bf1ec01..c2921e1d8c4c 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -583,13 +583,14 @@ static bool rcar_i2c_slave_irq(struct rcar_i2c_priv *priv) rcar_i2c_write(priv, ICSIER, SDR | SSR | SAR); } - rcar_i2c_write(priv, ICSSR, ~SAR & 0xff); + /* Clear SSR, too, because of old STOPs to other clients than us */ + rcar_i2c_write(priv, ICSSR, ~(SAR | SSR) & 0xff); } /* master sent stop */ if (ssr_filtered & SSR) { i2c_slave_event(priv->slave, I2C_SLAVE_STOP, &value); - rcar_i2c_write(priv, ICSIER, SAR | SSR); + rcar_i2c_write(priv, ICSIER, SAR); rcar_i2c_write(priv, ICSSR, ~SSR & 0xff); } @@ -853,7 +854,7 @@ static int rcar_reg_slave(struct i2c_client *slave) priv->slave = slave; rcar_i2c_write(priv, ICSAR, slave->addr); rcar_i2c_write(priv, ICSSR, 0); - rcar_i2c_write(priv, ICSIER, SAR | SSR); + rcar_i2c_write(priv, ICSIER, SAR); rcar_i2c_write(priv, ICSCR, SIE | SDBS); return 0; From 471fb8c55c24a1b50a07d7708aa2d7c202ec896e Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 15 Jul 2020 12:09:01 +0100 Subject: [PATCH 008/122] dt-bindings: i2c: renesas,i2c: Document r8a774e1 support Document i2c controller for RZ/G2H (R8A774E1) SoC, which is compatible with R-Car Gen3 SoC family. Signed-off-by: Lad Prabhakar Reviewed-by: Marian-Cristian Rotariu Reviewed-by: Geert Uytterhoeven Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/renesas,i2c.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/renesas,i2c.txt b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt index a03f9f5cb378..96d869ac3839 100644 --- a/Documentation/devicetree/bindings/i2c/renesas,i2c.txt +++ b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt @@ -10,6 +10,7 @@ Required properties: "renesas,i2c-r8a774a1" if the device is a part of a R8A774A1 SoC. "renesas,i2c-r8a774b1" if the device is a part of a R8A774B1 SoC. "renesas,i2c-r8a774c0" if the device is a part of a R8A774C0 SoC. + "renesas,i2c-r8a774e1" if the device is a part of a R8A774E1 SoC. "renesas,i2c-r8a7778" if the device is a part of a R8A7778 SoC. "renesas,i2c-r8a7779" if the device is a part of a R8A7779 SoC. "renesas,i2c-r8a7790" if the device is a part of a R8A7790 SoC. From 99f0975d760b6320dd5490bd0bc3a31900284757 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 15 Jul 2020 12:09:02 +0100 Subject: [PATCH 009/122] dt-bindings: i2c: renesas,iic: Document r8a774e1 support Document IIC controller for RZ/G2H (R8A774E1) SoC, which is compatible with R-Car Gen3 SoC family. Signed-off-by: Lad Prabhakar Reviewed-by: Marian-Cristian Rotariu Reviewed-by: Geert Uytterhoeven Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/renesas,iic.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/renesas,iic.txt b/Documentation/devicetree/bindings/i2c/renesas,iic.txt index 89facb09337a..93d412832e66 100644 --- a/Documentation/devicetree/bindings/i2c/renesas,iic.txt +++ b/Documentation/devicetree/bindings/i2c/renesas,iic.txt @@ -11,6 +11,7 @@ Required properties: - "renesas,iic-r8a774a1" (RZ/G2M) - "renesas,iic-r8a774b1" (RZ/G2N) - "renesas,iic-r8a774c0" (RZ/G2E) + - "renesas,iic-r8a774e1" (RZ/G2H) - "renesas,iic-r8a7790" (R-Car H2) - "renesas,iic-r8a7791" (R-Car M2-W) - "renesas,iic-r8a7792" (R-Car V2H) From 8682dc1245099cccd7e75f72ffb911341bf45ba7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 1 Jul 2020 10:23:18 +0200 Subject: [PATCH 010/122] i2c: core: do not use logical device when creating irq domain Let's rather use its physical parent device to give proper namings and connections in debugfs. Signed-off-by: Wolfram Sang Reviewed-by: Benjamin Tissoires Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index dc43242a85ba..69217d2193da 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1227,7 +1227,7 @@ static int i2c_setup_host_notify_irq_domain(struct i2c_adapter *adap) if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_HOST_NOTIFY)) return 0; - domain = irq_domain_create_linear(adap->dev.fwnode, + domain = irq_domain_create_linear(adap->dev.parent->fwnode, I2C_ADDR_7BITS_COUNT, &i2c_host_notify_irq_ops, adap); if (!domain) From 9e29420ddb1331de1f86fe544a460d8f2c6d11c6 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 27 Jul 2020 13:57:19 -0700 Subject: [PATCH 011/122] i2c: tegra: Don't mark VI I2C as IRQ safe runtime PM Tegra VI I2C is part of VE power domain and typically used for camera usecases. VE power domain is not always on and is non-IRQ safe. So, IRQ safe device cannot be attached to a non-IRQ safe domain as it prevents powering off the PM domain and generic power domain driver will warn. Current driver marks all I2C devices as IRQ safe and VI I2C device does not require IRQ safe as it will not be used for atomic transfers. This patch has fix to make VI I2C as non-IRQ safe. Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 15772964a05f..3be101824e80 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1750,7 +1750,15 @@ static int tegra_i2c_probe(struct platform_device *pdev) goto unprepare_slow_clk; } - pm_runtime_irq_safe(&pdev->dev); + /* + * VI I2C is in VE power domain which is not always on and not + * an IRQ safe. So, IRQ safe device can't be attached to a non-IRQ + * safe domain as it prevents powering off the PM domain. + * Also, VI I2C device don't need to use runtime IRQ safe as it will + * not be used for atomic transfers. + */ + if (!i2c_dev->is_vi) + pm_runtime_irq_safe(&pdev->dev); pm_runtime_enable(&pdev->dev); if (!pm_runtime_enabled(&pdev->dev)) { ret = tegra_i2c_runtime_resume(&pdev->dev); From 7232f53e73839bec4a2747d0c01d29994f9f39b9 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 27 Jul 2020 13:57:20 -0700 Subject: [PATCH 012/122] i2c: tegra: Remove NULL pointer check before clk_enable/disable/prepare/unprepare clk_enable, clk_disable, clk_prepare, and clk_unprepare APIs have implementation for checking clk pointer not NULL and clock consumers can safely call these APIs without NULL pointer check. So, this patch cleans up Tegra i2c driver to remove explicit checks before these APIs. Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 64 ++++++++++++---------------------- 1 file changed, 23 insertions(+), 41 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 3be101824e80..c91307b9e13d 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -655,21 +655,17 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) if (ret) return ret; - if (!i2c_dev->hw->has_single_clk_source) { - ret = clk_enable(i2c_dev->fast_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, - "Enabling fast clk failed, err %d\n", ret); - return ret; - } + ret = clk_enable(i2c_dev->fast_clk); + if (ret < 0) { + dev_err(i2c_dev->dev, + "Enabling fast clk failed, err %d\n", ret); + return ret; } - if (i2c_dev->slow_clk) { - ret = clk_enable(i2c_dev->slow_clk); - if (ret < 0) { - dev_err(dev, "failed to enable slow clock: %d\n", ret); - return ret; - } + ret = clk_enable(i2c_dev->slow_clk); + if (ret < 0) { + dev_err(dev, "failed to enable slow clock: %d\n", ret); + return ret; } ret = clk_enable(i2c_dev->div_clk); @@ -688,12 +684,8 @@ static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); clk_disable(i2c_dev->div_clk); - - if (i2c_dev->slow_clk) - clk_disable(i2c_dev->slow_clk); - - if (!i2c_dev->hw->has_single_clk_source) - clk_disable(i2c_dev->fast_clk); + clk_disable(i2c_dev->slow_clk); + clk_disable(i2c_dev->fast_clk); return pinctrl_pm_select_idle_state(i2c_dev->dev); } @@ -1716,20 +1708,16 @@ static int tegra_i2c_probe(struct platform_device *pdev) platform_set_drvdata(pdev, i2c_dev); - if (!i2c_dev->hw->has_single_clk_source) { - ret = clk_prepare(i2c_dev->fast_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, "Clock prepare failed %d\n", ret); - return ret; - } + ret = clk_prepare(i2c_dev->fast_clk); + if (ret < 0) { + dev_err(i2c_dev->dev, "Clock prepare failed %d\n", ret); + return ret; } - if (i2c_dev->slow_clk) { - ret = clk_prepare(i2c_dev->slow_clk); - if (ret < 0) { - dev_err(dev, "failed to prepare slow clock: %d\n", ret); - goto unprepare_fast_clk; - } + ret = clk_prepare(i2c_dev->slow_clk); + if (ret < 0) { + dev_err(dev, "failed to prepare slow clock: %d\n", ret); + goto unprepare_fast_clk; } if (i2c_dev->bus_clk_rate > I2C_MAX_FAST_MODE_FREQ && @@ -1843,12 +1831,10 @@ static int tegra_i2c_probe(struct platform_device *pdev) clk_unprepare(i2c_dev->div_clk); unprepare_slow_clk: - if (i2c_dev->is_vi) - clk_unprepare(i2c_dev->slow_clk); + clk_unprepare(i2c_dev->slow_clk); unprepare_fast_clk: - if (!i2c_dev->hw->has_single_clk_source) - clk_unprepare(i2c_dev->fast_clk); + clk_unprepare(i2c_dev->fast_clk); return ret; } @@ -1867,12 +1853,8 @@ static int tegra_i2c_remove(struct platform_device *pdev) tegra_i2c_runtime_suspend(&pdev->dev); clk_unprepare(i2c_dev->div_clk); - - if (i2c_dev->slow_clk) - clk_unprepare(i2c_dev->slow_clk); - - if (!i2c_dev->hw->has_single_clk_source) - clk_unprepare(i2c_dev->fast_clk); + clk_unprepare(i2c_dev->slow_clk); + clk_unprepare(i2c_dev->fast_clk); tegra_i2c_release_dma(i2c_dev); return 0; From 42aa38b54e35216351e52bb03382a38edc6f1bb8 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 27 Jul 2020 13:57:21 -0700 Subject: [PATCH 013/122] i2c: tegra: Fix the error path in tegra_i2c_runtime_resume tegra_i2c_runtime_resume does not disable prior enabled clocks properly. This patch fixes it. Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index c91307b9e13d..7b93c454b4c7 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -665,18 +665,23 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) ret = clk_enable(i2c_dev->slow_clk); if (ret < 0) { dev_err(dev, "failed to enable slow clock: %d\n", ret); - return ret; + goto disable_fast_clk; } ret = clk_enable(i2c_dev->div_clk); if (ret < 0) { dev_err(i2c_dev->dev, "Enabling div clk failed, err %d\n", ret); - clk_disable(i2c_dev->fast_clk); - return ret; + goto disable_slow_clk; } return 0; + +disable_slow_clk: + clk_disable(i2c_dev->slow_clk); +disable_fast_clk: + clk_disable(i2c_dev->fast_clk); + return ret; } static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) From 0d722620986722208c6bc2a67f5d68aa73b7cd5f Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 27 Jul 2020 13:57:22 -0700 Subject: [PATCH 014/122] i2c: tegra: Fix runtime resume to re-init VI I2C VI I2C is on host1x bus and is part of VE power domain. During suspend/resume VE power domain goes through power off/on. So, controller reset followed by i2c re-initialization is required after the domain power up. This patch fixes it. Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 7b93c454b4c7..1bf36669ca67 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -293,6 +293,8 @@ struct tegra_i2c_dev { bool is_curr_atomic_xfer; }; +static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit); + static void dvc_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned long reg) { @@ -675,8 +677,22 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) goto disable_slow_clk; } + /* + * VI I2C device is attached to VE power domain which goes through + * power ON/OFF during PM runtime resume/suspend. So, controller + * should go through reset and need to re-initialize after power + * domain ON. + */ + if (i2c_dev->is_vi) { + ret = tegra_i2c_init(i2c_dev, true); + if (ret) + goto disable_div_clk; + } + return 0; +disable_div_clk: + clk_disable(i2c_dev->div_clk); disable_slow_clk: clk_disable(i2c_dev->slow_clk); disable_fast_clk: From afca861bc6a3141c858d08279eb9afca76584fa6 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 27 Jul 2020 13:57:23 -0700 Subject: [PATCH 015/122] i2c: tegra: Avoid tegra_i2c_init_dma() for Tegra210 vi i2c VI I2C is on host1x bus so APB DMA can't be used for Tegra210 VI I2C and there are no tx and rx dma channels for VI I2C. So, avoid attempt of requesting DMA channels. Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 1bf36669ca67..00d3e4d7a01e 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -421,7 +421,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) dma_addr_t dma_phys; int err; - if (!i2c_dev->hw->has_apb_dma) + if (!i2c_dev->hw->has_apb_dma || i2c_dev->is_vi) return 0; if (!IS_ENABLED(CONFIG_TEGRA20_APB_DMA)) { From c7c9e914f9a0478fba4dc6f227cfd69cf84a4063 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 26 Jul 2020 18:16:06 +0200 Subject: [PATCH 016/122] i2c: rcar: avoid race when unregistering slave MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to the lockless design of the driver, it is theoretically possible to access a NULL pointer, if a slave interrupt was running while we were unregistering the slave. To make this rock solid, disable the interrupt for a short time while we are clearing the interrupt_enable register. This patch is purely based on code inspection. The OOPS is super-hard to trigger because clearing SAR (the address) makes interrupts even more unlikely to happen as well. While here, reinit SCR to SDBS because this bit should always be set according to documentation. There is no effect, though, because the interface is disabled. Fixes: 7b814d852af6 ("i2c: rcar: avoid race when unregistering slave client") Signed-off-by: Wolfram Sang Reviewed-by: Niklas Söderlund Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rcar.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 76c615be5aca..9e883474db8c 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -866,12 +866,14 @@ static int rcar_unreg_slave(struct i2c_client *slave) WARN_ON(!priv->slave); - /* disable irqs and ensure none is running before clearing ptr */ + /* ensure no irq is running before clearing ptr */ + disable_irq(priv->irq); rcar_i2c_write(priv, ICSIER, 0); - rcar_i2c_write(priv, ICSCR, 0); + rcar_i2c_write(priv, ICSSR, 0); + enable_irq(priv->irq); + rcar_i2c_write(priv, ICSCR, SDBS); rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */ - synchronize_irq(priv->irq); priv->slave = NULL; pm_runtime_put(rcar_i2c_priv_to_dev(priv)); From db36e827d8762eaa6aacd2c918a55ac487aa2501 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Tue, 4 Aug 2020 12:59:23 +0300 Subject: [PATCH 017/122] dt-bindings: i2c: add generic properties for GPIO bus recovery The I2C GPIO bus recovery properties consist of two GPIOS and one extra pinctrl state ("gpio" or "recovery"). "recovery" pinctrl state is considered deprecated and "gpio" should be used instead. Not all are mandatory for recovery. Signed-off-by: Codrin Ciubotariu Reviewed-by: Rob Herring [wsa: kept sorting, minor whitespace change] Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt index 438ae123107e..a21c359b9f02 100644 --- a/Documentation/devicetree/bindings/i2c/i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c.txt @@ -72,6 +72,16 @@ wants to support one of the below features, it should adapt these bindings. this information to adapt power management to keep the arbitration awake all the time, for example. Can not be combined with 'single-master'. +- pinctrl + add extra pinctrl to configure SCL/SDA pins to GPIO function for bus + recovery, call it "gpio" or "recovery" (deprecated) state + +- scl-gpios + specify the gpio related to SCL pin. Used for GPIO bus recovery. + +- sda-gpios + specify the gpio related to SDA pin. Optional for GPIO bus recovery. + - single-master states that there is no other master active on this bus. The OS can use this information to detect a stalled bus more reliably, for example. From 75820314de26b00aaea0d0e79269c0d17914c5c4 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Tue, 4 Aug 2020 12:59:24 +0300 Subject: [PATCH 018/122] i2c: core: add generic I2C GPIO recovery Multiple I2C bus drivers use similar bindings to obtain information needed for I2C recovery. For example, for platforms using device-tree, the properties look something like this: &i2c { ... pinctrl-names = "default", "gpio"; pinctrl-0 = <&pinctrl_i2c_default>; pinctrl-1 = <&pinctrl_i2c_gpio>; sda-gpios = <&pio 0 GPIO_ACTIVE_HIGH>; scl-gpios = <&pio 1 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; ... } For this reason, we can add this common initialization in the core. This way, other I2C bus drivers will be able to support GPIO recovery just by providing a pointer to platform's pinctrl and calling i2c_recover_bus() when SDA is stuck low. Signed-off-by: Codrin Ciubotariu [wsa: inverted one logic for better readability, minor update to kdoc] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 126 ++++++++++++++++++++++++++++++++++++ include/linux/i2c.h | 11 ++++ 2 files changed, 137 insertions(+) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 69217d2193da..ece25560eae4 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -181,6 +182,8 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap) if (bri->prepare_recovery) bri->prepare_recovery(adap); + if (bri->pinctrl) + pinctrl_select_state(bri->pinctrl, bri->pins_gpio); /* * If we can set SDA, we will always create a STOP to ensure additional @@ -236,6 +239,8 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap) if (bri->unprepare_recovery) bri->unprepare_recovery(adap); + if (bri->pinctrl) + pinctrl_select_state(bri->pinctrl, bri->pins_default); return ret; } @@ -251,6 +256,125 @@ int i2c_recover_bus(struct i2c_adapter *adap) } EXPORT_SYMBOL_GPL(i2c_recover_bus); +static void i2c_gpio_init_pinctrl_recovery(struct i2c_adapter *adap) +{ + struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; + struct device *dev = &adap->dev; + struct pinctrl *p = bri->pinctrl; + + /* + * we can't change states without pinctrl, so remove the states if + * populated + */ + if (!p) { + bri->pins_default = NULL; + bri->pins_gpio = NULL; + return; + } + + if (!bri->pins_default) { + bri->pins_default = pinctrl_lookup_state(p, + PINCTRL_STATE_DEFAULT); + if (IS_ERR(bri->pins_default)) { + dev_dbg(dev, PINCTRL_STATE_DEFAULT " state not found for GPIO recovery\n"); + bri->pins_default = NULL; + } + } + if (!bri->pins_gpio) { + bri->pins_gpio = pinctrl_lookup_state(p, "gpio"); + if (IS_ERR(bri->pins_gpio)) + bri->pins_gpio = pinctrl_lookup_state(p, "recovery"); + + if (IS_ERR(bri->pins_gpio)) { + dev_dbg(dev, "no gpio or recovery state found for GPIO recovery\n"); + bri->pins_gpio = NULL; + } + } + + /* for pinctrl state changes, we need all the information */ + if (bri->pins_default && bri->pins_gpio) { + dev_info(dev, "using pinctrl states for GPIO recovery"); + } else { + bri->pinctrl = NULL; + bri->pins_default = NULL; + bri->pins_gpio = NULL; + } +} + +static int i2c_gpio_init_generic_recovery(struct i2c_adapter *adap) +{ + struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; + struct device *dev = &adap->dev; + struct gpio_desc *gpiod; + int ret = 0; + + /* + * don't touch the recovery information if the driver is not using + * generic SCL recovery + */ + if (bri->recover_bus && bri->recover_bus != i2c_generic_scl_recovery) + return 0; + + /* + * pins might be taken as GPIO, so we should inform pinctrl about + * this and move the state to GPIO + */ + if (bri->pinctrl) + pinctrl_select_state(bri->pinctrl, bri->pins_gpio); + + /* + * if there is incomplete or no recovery information, see if generic + * GPIO recovery is available + */ + if (!bri->scl_gpiod) { + gpiod = devm_gpiod_get(dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN); + if (PTR_ERR(gpiod) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto cleanup_pinctrl_state; + } + if (!IS_ERR(gpiod)) { + bri->scl_gpiod = gpiod; + bri->recover_bus = i2c_generic_scl_recovery; + dev_info(dev, "using generic GPIOs for recovery\n"); + } + } + + /* SDA GPIOD line is optional, so we care about DEFER only */ + if (!bri->sda_gpiod) { + /* + * We have SCL. Pull SCL low and wait a bit so that SDA glitches + * have no effect. + */ + gpiod_direction_output(bri->scl_gpiod, 0); + udelay(10); + gpiod = devm_gpiod_get(dev, "sda", GPIOD_IN); + + /* Wait a bit in case of a SDA glitch, and then release SCL. */ + udelay(10); + gpiod_direction_output(bri->scl_gpiod, 1); + + if (PTR_ERR(gpiod) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto cleanup_pinctrl_state; + } + if (!IS_ERR(gpiod)) + bri->sda_gpiod = gpiod; + } + +cleanup_pinctrl_state: + /* change the state of the pins back to their default state */ + if (bri->pinctrl) + pinctrl_select_state(bri->pinctrl, bri->pins_default); + + return ret; +} + +static int i2c_gpio_init_recovery(struct i2c_adapter *adap) +{ + i2c_gpio_init_pinctrl_recovery(adap); + return i2c_gpio_init_generic_recovery(adap); +} + static void i2c_init_recovery(struct i2c_adapter *adap) { struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; @@ -259,6 +383,8 @@ static void i2c_init_recovery(struct i2c_adapter *adap) if (!bri) return; + i2c_gpio_init_recovery(adap); + if (!bri->recover_bus) { err_str = "no recover_bus() found"; goto err; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 8ea9c3f86dba..d387d3786429 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -606,6 +606,14 @@ struct i2c_timings { * may configure padmux here for SDA/SCL line or something else they want. * @scl_gpiod: gpiod of the SCL line. Only required for GPIO recovery. * @sda_gpiod: gpiod of the SDA line. Only required for GPIO recovery. + * @pinctrl: pinctrl used by GPIO recovery to change the state of the I2C pins. + * Optional. + * @pins_default: default pinctrl state of SCL/SDA lines, when they are assigned + * to the I2C bus. Optional. Populated internally for GPIO recovery, if + * state with the name PINCTRL_STATE_DEFAULT is found and pinctrl is valid. + * @pins_gpio: recovery pinctrl state of SCL/SDA lines, when they are used as + * GPIOs. Optional. Populated internally for GPIO recovery, if this state + * is called "gpio" or "recovery" and pinctrl is valid. */ struct i2c_bus_recovery_info { int (*recover_bus)(struct i2c_adapter *adap); @@ -622,6 +630,9 @@ struct i2c_bus_recovery_info { /* gpio recovery */ struct gpio_desc *scl_gpiod; struct gpio_desc *sda_gpiod; + struct pinctrl *pinctrl; + struct pinctrl_state *pins_default; + struct pinctrl_state *pins_gpio; }; int i2c_recover_bus(struct i2c_adapter *adap); From 23a698fe65eca15fbdf925e8d70c5bf20ebf989e Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Tue, 4 Aug 2020 12:59:25 +0300 Subject: [PATCH 019/122] i2c: core: treat EPROBE_DEFER when acquiring SCL/SDA GPIOs Even if I2C bus GPIO recovery is optional, devm_gpiod_get() can return -EPROBE_DEFER, so we should at least treat that. This ends up with i2c_register_adapter() to be able to return -EPROBE_DEFER. Signed-off-by: Codrin Ciubotariu Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index ece25560eae4..34a9609f256d 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -375,15 +375,16 @@ static int i2c_gpio_init_recovery(struct i2c_adapter *adap) return i2c_gpio_init_generic_recovery(adap); } -static void i2c_init_recovery(struct i2c_adapter *adap) +static int i2c_init_recovery(struct i2c_adapter *adap) { struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; char *err_str; if (!bri) - return; + return 0; - i2c_gpio_init_recovery(adap); + if (i2c_gpio_init_recovery(adap) == -EPROBE_DEFER) + return -EPROBE_DEFER; if (!bri->recover_bus) { err_str = "no recover_bus() found"; @@ -399,10 +400,7 @@ static void i2c_init_recovery(struct i2c_adapter *adap) if (gpiod_get_direction(bri->sda_gpiod) == 0) bri->set_sda = set_sda_gpio_value; } - return; - } - - if (bri->recover_bus == i2c_generic_scl_recovery) { + } else if (bri->recover_bus == i2c_generic_scl_recovery) { /* Generic SCL recovery */ if (!bri->set_scl || !bri->get_scl) { err_str = "no {get|set}_scl() found"; @@ -414,10 +412,12 @@ static void i2c_init_recovery(struct i2c_adapter *adap) } } - return; + return 0; err: dev_err(&adap->dev, "Not using recovery: %s\n", err_str); adap->bus_recovery_info = NULL; + + return -EINVAL; } static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client) @@ -1444,12 +1444,16 @@ static int i2c_register_adapter(struct i2c_adapter *adap) if (res) goto out_reg; - dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name); - pm_runtime_no_callbacks(&adap->dev); pm_suspend_ignore_children(&adap->dev, true); pm_runtime_enable(&adap->dev); + res = i2c_init_recovery(adap); + if (res == -EPROBE_DEFER) + goto out_reg; + + dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name); + #ifdef CONFIG_I2C_COMPAT res = class_compat_create_link(i2c_adapter_compat_class, &adap->dev, adap->dev.parent); @@ -1458,8 +1462,6 @@ static int i2c_register_adapter(struct i2c_adapter *adap) "Failed to create compatibility class link\n"); #endif - i2c_init_recovery(adap); - /* create pre-declared device nodes */ of_i2c_register_devices(adap); i2c_acpi_register_devices(adap); From 543aa2c4da8b2421633ec225cd2d4110115415f2 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Tue, 4 Aug 2020 12:59:26 +0300 Subject: [PATCH 020/122] i2c: at91: Move to generic GPIO bus recovery Make the Microchip at91 driver the first to use the generic GPIO bus recovery support from the I2C core and discard the driver implementation. Signed-off-by: Codrin Ciubotariu Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-at91-master.c | 69 ++-------------------------- drivers/i2c/busses/i2c-at91.h | 3 -- 2 files changed, 3 insertions(+), 69 deletions(-) diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c index 363d540a8345..66864f9cf7ac 100644 --- a/drivers/i2c/busses/i2c-at91-master.c +++ b/drivers/i2c/busses/i2c-at91-master.c @@ -816,79 +816,16 @@ static int at91_twi_configure_dma(struct at91_twi_dev *dev, u32 phy_addr) return ret; } -static void at91_prepare_twi_recovery(struct i2c_adapter *adap) -{ - struct at91_twi_dev *dev = i2c_get_adapdata(adap); - - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_gpio); -} - -static void at91_unprepare_twi_recovery(struct i2c_adapter *adap) -{ - struct at91_twi_dev *dev = i2c_get_adapdata(adap); - - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_default); -} - static int at91_init_twi_recovery_gpio(struct platform_device *pdev, struct at91_twi_dev *dev) { struct i2c_bus_recovery_info *rinfo = &dev->rinfo; - dev->pinctrl = devm_pinctrl_get(&pdev->dev); - if (!dev->pinctrl || IS_ERR(dev->pinctrl)) { + rinfo->pinctrl = devm_pinctrl_get(&pdev->dev); + if (!rinfo->pinctrl || IS_ERR(rinfo->pinctrl)) { dev_info(dev->dev, "can't get pinctrl, bus recovery not supported\n"); - return PTR_ERR(dev->pinctrl); + return PTR_ERR(rinfo->pinctrl); } - - dev->pinctrl_pins_default = pinctrl_lookup_state(dev->pinctrl, - PINCTRL_STATE_DEFAULT); - dev->pinctrl_pins_gpio = pinctrl_lookup_state(dev->pinctrl, - "gpio"); - if (IS_ERR(dev->pinctrl_pins_default) || - IS_ERR(dev->pinctrl_pins_gpio)) { - dev_info(&pdev->dev, "pinctrl states incomplete for recovery\n"); - return -EINVAL; - } - - /* - * pins will be taken as GPIO, so we might as well inform pinctrl about - * this and move the state to GPIO - */ - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_gpio); - - rinfo->sda_gpiod = devm_gpiod_get(&pdev->dev, "sda", GPIOD_IN); - if (PTR_ERR(rinfo->sda_gpiod) == -EPROBE_DEFER) - return -EPROBE_DEFER; - - rinfo->scl_gpiod = devm_gpiod_get(&pdev->dev, "scl", - GPIOD_OUT_HIGH_OPEN_DRAIN); - if (PTR_ERR(rinfo->scl_gpiod) == -EPROBE_DEFER) - return -EPROBE_DEFER; - - if (IS_ERR(rinfo->sda_gpiod) || - IS_ERR(rinfo->scl_gpiod)) { - dev_info(&pdev->dev, "recovery information incomplete\n"); - if (!IS_ERR(rinfo->sda_gpiod)) { - gpiod_put(rinfo->sda_gpiod); - rinfo->sda_gpiod = NULL; - } - if (!IS_ERR(rinfo->scl_gpiod)) { - gpiod_put(rinfo->scl_gpiod); - rinfo->scl_gpiod = NULL; - } - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_default); - return -EINVAL; - } - - /* change the state of the pins back to their default state */ - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_default); - - dev_info(&pdev->dev, "using scl, sda for recovery\n"); - - rinfo->prepare_recovery = at91_prepare_twi_recovery; - rinfo->unprepare_recovery = at91_unprepare_twi_recovery; - rinfo->recover_bus = i2c_generic_scl_recovery; dev->adapter.bus_recovery_info = rinfo; return 0; diff --git a/drivers/i2c/busses/i2c-at91.h b/drivers/i2c/busses/i2c-at91.h index 7e7b4955ca7f..eae673ae786c 100644 --- a/drivers/i2c/busses/i2c-at91.h +++ b/drivers/i2c/busses/i2c-at91.h @@ -157,9 +157,6 @@ struct at91_twi_dev { struct at91_twi_dma dma; bool slave_detected; struct i2c_bus_recovery_info rinfo; - struct pinctrl *pinctrl; - struct pinctrl_state *pinctrl_pins_default; - struct pinctrl_state *pinctrl_pins_gpio; #ifdef CONFIG_I2C_AT91_SLAVE_EXPERIMENTAL unsigned smr; struct i2c_client *slave; From cee807cf9e5beb3e7dbdd6804773f5b8749baa25 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 2 Aug 2020 10:21:22 +0200 Subject: [PATCH 021/122] Documentation: i2c: dev: 'block process call' is supported And it has been for a while (since 2012 at least), only it was not documented. Add it. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- Documentation/i2c/dev-interface.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/i2c/dev-interface.rst b/Documentation/i2c/dev-interface.rst index bdb247f2f11a..73ad34849f99 100644 --- a/Documentation/i2c/dev-interface.rst +++ b/Documentation/i2c/dev-interface.rst @@ -159,6 +159,8 @@ for details) through the following functions:: __s32 i2c_smbus_read_word_data(int file, __u8 command); __s32 i2c_smbus_write_word_data(int file, __u8 command, __u16 value); __s32 i2c_smbus_process_call(int file, __u8 command, __u16 value); + __s32 i2c_smbus_block_process_call(int file, __u8 command, __u8 length, + __u8 *values); __s32 i2c_smbus_read_block_data(int file, __u8 command, __u8 *values); __s32 i2c_smbus_write_block_data(int file, __u8 command, __u8 length, __u8 *values); From db4694e66286b691bf67cb2d1280e42d3604ea79 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Fri, 17 Jul 2020 20:52:47 +0200 Subject: [PATCH 022/122] i2c: bcm2835: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Signed-off-by: Alexander A. Klimov [wsa: shortened commit message] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-bcm2835.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index d9b86fcc3825..5dc519516292 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -392,7 +392,7 @@ static const struct i2c_algorithm bcm2835_i2c_algo = { /* * The BCM2835 was reported to have problems with clock stretching: - * http://www.advamation.com/knowhow/raspberrypi/rpi-i2c-bug.html + * https://www.advamation.com/knowhow/raspberrypi/rpi-i2c-bug.html * https://www.raspberrypi.org/forums/viewtopic.php?p=146272 */ static const struct i2c_adapter_quirks bcm2835_i2c_quirks = { From 12745b071e2b6b43e623e6cce521a1cb3c4c28dc Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 18 Jun 2020 16:42:39 +0300 Subject: [PATCH 023/122] i2c: i801: Add support for Intel Emmitsburg PCH Add support for SMBus controller on Intel Emmitsburg PCH. This is the same IP as used in Cannon Lake and derivatives. Signed-off-by: Mika Westerberg Signed-off-by: Jarkko Nikula Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/busses/i2c-i801.rst | 1 + drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-i801.c | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/Documentation/i2c/busses/i2c-i801.rst b/Documentation/i2c/busses/i2c-i801.rst index b83da0e94184..faf32330c335 100644 --- a/Documentation/i2c/busses/i2c-i801.rst +++ b/Documentation/i2c/busses/i2c-i801.rst @@ -43,6 +43,7 @@ Supported adapters: * Intel Elkhart Lake (PCH) * Intel Tiger Lake (PCH) * Intel Jasper Lake (SOC) + * Intel Emmitsburg (PCH) Datasheets: Publicly available at the Intel website diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 735bf31a3fdf..085ca9b009ed 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -146,6 +146,7 @@ config I2C_I801 Elkhart Lake (PCH) Tiger Lake (PCH) Jasper Lake (SOC) + Emmitsburg (PCH) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 638e7f7c66cc..63b0c9d054ff 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -54,6 +54,7 @@ * Sunrise Point-H (PCH) 0xa123 32 hard yes yes yes * Sunrise Point-LP (PCH) 0x9d23 32 hard yes yes yes * DNV (SOC) 0x19df 32 hard yes yes yes + * Emmitsburg (PCH) 0x1bc9 32 hard yes yes yes * Broxton (SOC) 0x5ad4 32 hard yes yes yes * Lewisburg (PCH) 0xa1a3 32 hard yes yes yes * Lewisburg Supersku (PCH) 0xa223 32 hard yes yes yes @@ -207,6 +208,7 @@ #define PCI_DEVICE_ID_INTEL_BAYTRAIL_SMBUS 0x0f12 #define PCI_DEVICE_ID_INTEL_CDF_SMBUS 0x18df #define PCI_DEVICE_ID_INTEL_DNV_SMBUS 0x19df +#define PCI_DEVICE_ID_INTEL_EBG_SMBUS 0x1bc9 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS 0x1c22 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS 0x1d22 /* Patsburg also has three 'Integrated Device Function' SMBus controllers */ @@ -1062,6 +1064,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CDF_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EBG_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROXTON_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS) }, @@ -1749,6 +1752,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS: case PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS: case PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS: + case PCI_DEVICE_ID_INTEL_EBG_SMBUS: priv->features |= FEATURE_BLOCK_PROC; priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; From f46efbcad97bfb2caded0397eccce7c71402868c Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 18 Jun 2020 16:42:40 +0300 Subject: [PATCH 024/122] i2c: i801: Add support for Intel Tiger Lake PCH-H Add SMBus PCI ID on Intel Tiger Lake PCH-H. Signed-off-by: Jarkko Nikula Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 63b0c9d054ff..e32ef3f01fe8 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -68,6 +68,7 @@ * Comet Lake-H (PCH) 0x06a3 32 hard yes yes yes * Elkhart Lake (PCH) 0x4b23 32 hard yes yes yes * Tiger Lake-LP (PCH) 0xa0a3 32 hard yes yes yes + * Tiger Lake-H (PCH) 0x43a3 32 hard yes yes yes * Jasper Lake (SOC) 0x4da3 32 hard yes yes yes * Comet Lake-V (PCH) 0xa3a3 32 hard yes yes yes * @@ -223,6 +224,7 @@ #define PCI_DEVICE_ID_INTEL_GEMINILAKE_SMBUS 0x31d4 #define PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS 0x34a3 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS 0x3b30 +#define PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS 0x43a3 #define PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS 0x4b23 #define PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS 0x4da3 #define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS 0x5ad4 @@ -1077,6 +1079,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COMETLAKE_V_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS) }, { 0, } }; @@ -1751,6 +1754,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_COMETLAKE_H_SMBUS: case PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS: case PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS: + case PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS: case PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS: case PCI_DEVICE_ID_INTEL_EBG_SMBUS: priv->features |= FEATURE_BLOCK_PROC; From b63da6c8dfa9b2ab3554e8c59ef294d1f28bb9bd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 5 Aug 2020 08:49:58 -0700 Subject: [PATCH 025/122] xfs: delete duplicated words + other fixes Delete repeated words in fs/xfs/. {we, that, the, a, to, fork} Change "it it" to "it is" in one location. Signed-off-by: Randy Dunlap To: linux-fsdevel@vger.kernel.org Cc: Darrick J. Wong Cc: linux-xfs@vger.kernel.org Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_sb.c | 2 +- fs/xfs/xfs_attr_list.c | 2 +- fs/xfs/xfs_buf_item.c | 2 +- fs/xfs/xfs_buf_item_recover.c | 2 +- fs/xfs/xfs_dquot.c | 2 +- fs/xfs/xfs_export.c | 2 +- fs/xfs/xfs_inode.c | 4 ++-- fs/xfs/xfs_inode_item.c | 4 ++-- fs/xfs/xfs_iomap.c | 2 +- fs/xfs/xfs_log_cil.c | 2 +- fs/xfs/xfs_log_recover.c | 2 +- fs/xfs/xfs_refcount_item.c | 2 +- fs/xfs/xfs_reflink.c | 2 +- fs/xfs/xfs_trans_ail.c | 4 ++-- 14 files changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 4df87546bd40..ae9aaf1f34bf 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -600,7 +600,7 @@ xfs_sb_quota_to_disk( * disk. If neither are active, we should NULL the inode. * * In all cases, the separate pquotino must remain 0 because it - * it beyond the "end" of the valid non-pquotino superblock. + * is beyond the "end" of the valid non-pquotino superblock. */ if (from->sb_qflags & XFS_GQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_gquotino); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index e380bd1a9bfc..50f922cad91a 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -44,7 +44,7 @@ xfs_attr_shortform_compare(const void *a, const void *b) /* * Copy out entries of shortform attribute lists for attr_list(). * Shortform attribute lists are not stored in hashval sorted order. - * If the output buffer is not large enough to hold them all, then we + * If the output buffer is not large enough to hold them all, then * we have to calculate each entries' hashvalue and sort them before * we can begin returning them to the user. */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 5bb6f22cc11a..408d1b572d3f 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -127,7 +127,7 @@ xfs_buf_item_size_segment( * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged * in a single iovec. * - * Discontiguous buffers need a format structure per region that that is being + * Discontiguous buffers need a format structure per region that is being * logged. This makes the changes in the buffer appear to log recovery as though * they came from separate buffers, just like would occur if multiple buffers * were used instead of a single discontiguous buffer. This enables diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index d480f11e6b00..8f0457d67d77 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -948,7 +948,7 @@ xlog_recover_buf_commit_pass2( * or inode_cluster_size bytes, whichever is bigger. The inode * buffers in the log can be a different size if the log was generated * by an older kernel using unclustered inode buffers or a newer kernel - * running with a different inode cluster size. Regardless, if the + * running with a different inode cluster size. Regardless, if * the inode buffer size isn't max(blocksize, inode_cluster_size) * for *our* value of inode_cluster_size, then we need to keep * the buffer out of the buffer cache so that the buffer won't diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 04dc2be19c3a..bcd73b9c2994 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -807,7 +807,7 @@ xfs_qm_dqget_checks( } /* - * Given the file system, id, and type (UDQUOT/GDQUOT), return a a locked + * Given the file system, id, and type (UDQUOT/GDQUOT), return a locked * dquot, doing an allocation (if requested) as needed. */ int diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 5a4b0119143a..465fd9e048d4 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -56,7 +56,7 @@ xfs_fs_encode_fh( fileid_type = FILEID_INO32_GEN_PARENT; /* - * If the the filesystem may contain 64bit inode numbers, we need + * If the filesystem may contain 64bit inode numbers, we need * to use larger file handles that can represent them. * * While we only allocate inodes that do not fit into 32 bits any diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 407d6299606d..c06129cffba9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -451,7 +451,7 @@ xfs_lock_inodes( /* * Currently supports between 2 and 5 inodes with exclusive locking. We * support an arbitrary depth of locking here, but absolute limits on - * inodes depend on the the type of locking and the limits placed by + * inodes depend on the type of locking and the limits placed by * lockdep annotations in xfs_lock_inumorder. These are all checked by * the asserts. */ @@ -3105,7 +3105,7 @@ xfs_cross_rename( /* * xfs_rename_alloc_whiteout() * - * Return a referenced, unlinked, unlocked inode that that can be used as a + * Return a referenced, unlinked, unlocked inode that can be used as a * whiteout in a rename transaction. We use a tmpfile inode here so that if we * crash between allocating the inode and linking it into the rename transaction * recovery will free the inode and we won't leak it. diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 895f61b2b4f0..6c65938cee1c 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -191,7 +191,7 @@ xfs_inode_item_format_data_fork( ip->i_df.if_bytes > 0) { /* * Round i_bytes up to a word boundary. - * The underlying memory is guaranteed to + * The underlying memory is guaranteed * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_df.if_bytes, 4); @@ -275,7 +275,7 @@ xfs_inode_item_format_attr_fork( ip->i_afp->if_bytes > 0) { /* * Round i_bytes up to a word boundary. - * The underlying memory is guaranteed to + * The underlying memory is guaranteed * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_afp->if_bytes, 4); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 0e3f62cde375..3abb8b9d6f4c 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -865,7 +865,7 @@ xfs_buffered_write_iomap_begin( } /* - * Search the data fork fork first to look up our source mapping. We + * Search the data fork first to look up our source mapping. We * always need the data fork map, as we have to return it to the * iomap code so that the higher level write code can read data in to * perform read-modify-write cycles for unaligned writes. diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 56c32eecffea..b0ef071b3cb5 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -239,7 +239,7 @@ xfs_cil_prepare_item( * this CIL context and so we need to pin it. If we are replacing the * old_lv, then remove the space it accounts for and make it the shadow * buffer for later freeing. In both cases we are now switching to the - * shadow buffer, so update the the pointer to it appropriately. + * shadow buffer, so update the pointer to it appropriately. */ if (!old_lv) { if (lv->lv_item->li_ops->iop_pin) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 52a65a74208f..e2ec91b2d0f4 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1100,7 +1100,7 @@ xlog_verify_head( * * Note that xlog_find_tail() clears the blocks at the new head * (i.e., the records with invalid CRC) if the cycle number - * matches the the current cycle. + * matches the current cycle. */ found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, buffer, rhead_blk, rhead, wrapped); diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 7b2c72bc2858..ca93b6488377 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -485,7 +485,7 @@ xfs_cui_item_recover( * transaction. Normally, any work that needs to be deferred * gets attached to the same defer_ops that scheduled the * refcount update. However, we're in log recovery here, so we - * we use the passed in defer_ops and to finish up any work that + * use the passed in defer_ops and to finish up any work that * doesn't fit. We need to reserve enough blocks to handle a * full btree split on either end of the refcount range. */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index aac83f9d6107..16098dc42add 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -721,7 +721,7 @@ xfs_reflink_end_cow( * repeatedly cycles the ILOCK to allocate one transaction per remapped * extent. * - * If we're being called by writeback then the the pages will still + * If we're being called by writeback then the pages will still * have PageWriteback set, which prevents races with reflink remapping * and truncate. Reflink remapping prevents races with writeback by * taking the iolock and mmaplock before flushing the pages and diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0c783d339675..dbb69b4bf3ed 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -480,7 +480,7 @@ xfsaild_push( * inode buffer is locked because we already pushed the * updates to it as part of inode clustering. * - * We do not want to to stop flushing just because lots + * We do not want to stop flushing just because lots * of items are already being flushed, but we need to * re-try the flushing relatively soon if most of the * AIL is being flushed. @@ -515,7 +515,7 @@ xfsaild_push( /* * Are there too many items we can't do anything with? * - * If we we are skipping too many items because we can't flush + * If we are skipping too many items because we can't flush * them or they are already being flushed, we back off and * given them time to complete whatever operation is being * done. i.e. remove pressure from the AIL while we can't make From 8426fe70cfa4e4545c6a3709918638a7f613d528 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Wed, 5 Aug 2020 18:52:19 +0800 Subject: [PATCH 026/122] i2c: mediatek: Add apdma sync in i2c driver With the apdma remove hand-shake signal, it need to keep i2c and apdma in sync manually. Reviewed-by: Yingjoe Chen Reviewed-by: Matthias Brugger Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index deef69e56906..e6b984aebe74 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -48,6 +48,9 @@ #define I2C_DMA_CON_TX 0x0000 #define I2C_DMA_CON_RX 0x0001 +#define I2C_DMA_ASYNC_MODE 0x0004 +#define I2C_DMA_SKIP_CONFIG 0x0010 +#define I2C_DMA_DIR_CHANGE 0x0200 #define I2C_DMA_START_EN 0x0001 #define I2C_DMA_INT_FLAG_NONE 0x0000 #define I2C_DMA_CLR_FLAG 0x0000 @@ -205,6 +208,7 @@ struct mtk_i2c_compatible { unsigned char timing_adjust: 1; unsigned char dma_sync: 1; unsigned char ltiming_adjust: 1; + unsigned char apdma_sync: 1; }; struct mtk_i2c_ac_timing { @@ -311,6 +315,7 @@ static const struct mtk_i2c_compatible mt2712_compat = { .timing_adjust = 1, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, }; static const struct mtk_i2c_compatible mt6577_compat = { @@ -324,6 +329,7 @@ static const struct mtk_i2c_compatible mt6577_compat = { .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, }; static const struct mtk_i2c_compatible mt6589_compat = { @@ -337,6 +343,7 @@ static const struct mtk_i2c_compatible mt6589_compat = { .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, }; static const struct mtk_i2c_compatible mt7622_compat = { @@ -350,6 +357,7 @@ static const struct mtk_i2c_compatible mt7622_compat = { .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, }; static const struct mtk_i2c_compatible mt8173_compat = { @@ -362,6 +370,7 @@ static const struct mtk_i2c_compatible mt8173_compat = { .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, }; static const struct mtk_i2c_compatible mt8183_compat = { @@ -375,6 +384,7 @@ static const struct mtk_i2c_compatible mt8183_compat = { .timing_adjust = 1, .dma_sync = 1, .ltiming_adjust = 1, + .apdma_sync = 0, }; static const struct of_device_id mtk_i2c_of_match[] = { @@ -798,6 +808,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, u16 start_reg; u16 control_reg; u16 restart_flag = 0; + u16 dma_sync = 0; u32 reg_4g_mode; u8 *dma_rd_buf = NULL; u8 *dma_wr_buf = NULL; @@ -851,10 +862,16 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, mtk_i2c_writew(i2c, num, OFFSET_TRANSAC_LEN); } + if (i2c->dev_comp->apdma_sync) { + dma_sync = I2C_DMA_SKIP_CONFIG | I2C_DMA_ASYNC_MODE; + if (i2c->op == I2C_MASTER_WRRD) + dma_sync |= I2C_DMA_DIR_CHANGE; + } + /* Prepare buffer data to start transfer */ if (i2c->op == I2C_MASTER_RD) { writel(I2C_DMA_INT_FLAG_NONE, i2c->pdmabase + OFFSET_INT_FLAG); - writel(I2C_DMA_CON_RX, i2c->pdmabase + OFFSET_CON); + writel(I2C_DMA_CON_RX | dma_sync, i2c->pdmabase + OFFSET_CON); dma_rd_buf = i2c_get_dma_safe_msg_buf(msgs, 1); if (!dma_rd_buf) @@ -877,7 +894,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, writel(msgs->len, i2c->pdmabase + OFFSET_RX_LEN); } else if (i2c->op == I2C_MASTER_WR) { writel(I2C_DMA_INT_FLAG_NONE, i2c->pdmabase + OFFSET_INT_FLAG); - writel(I2C_DMA_CON_TX, i2c->pdmabase + OFFSET_CON); + writel(I2C_DMA_CON_TX | dma_sync, i2c->pdmabase + OFFSET_CON); dma_wr_buf = i2c_get_dma_safe_msg_buf(msgs, 1); if (!dma_wr_buf) @@ -900,7 +917,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, writel(msgs->len, i2c->pdmabase + OFFSET_TX_LEN); } else { writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_INT_FLAG); - writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_CON); + writel(I2C_DMA_CLR_FLAG | dma_sync, i2c->pdmabase + OFFSET_CON); dma_wr_buf = i2c_get_dma_safe_msg_buf(msgs, 1); if (!dma_wr_buf) From 908d984336daeb0cad65ba015ab0ade132f2e315 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Wed, 5 Aug 2020 18:52:20 +0800 Subject: [PATCH 027/122] i2c: mediatek: Add access to more than 8GB dram in i2c driver Newer MTK chip support more than 8GB of dram. Replace support_33bits with more general dma_max_support and remove mtk_i2c_set_4g_mode. Reviewed-by: Yingjoe Chen Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 39 ++++++++++++++------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index e6b984aebe74..463860e1d54b 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -55,7 +55,6 @@ #define I2C_DMA_INT_FLAG_NONE 0x0000 #define I2C_DMA_CLR_FLAG 0x0000 #define I2C_DMA_HARD_RST 0x0002 -#define I2C_DMA_4G_MODE 0x0001 #define MAX_SAMPLE_CNT_DIV 8 #define MAX_STEP_CNT_DIV 64 @@ -204,11 +203,11 @@ struct mtk_i2c_compatible { unsigned char dcm: 1; unsigned char auto_restart: 1; unsigned char aux_len_reg: 1; - unsigned char support_33bits: 1; unsigned char timing_adjust: 1; unsigned char dma_sync: 1; unsigned char ltiming_adjust: 1; unsigned char apdma_sync: 1; + unsigned char max_dma_support; }; struct mtk_i2c_ac_timing { @@ -311,11 +310,11 @@ static const struct mtk_i2c_compatible mt2712_compat = { .dcm = 1, .auto_restart = 1, .aux_len_reg = 1, - .support_33bits = 1, .timing_adjust = 1, .dma_sync = 0, .ltiming_adjust = 0, .apdma_sync = 0, + .max_dma_support = 33, }; static const struct mtk_i2c_compatible mt6577_compat = { @@ -325,11 +324,11 @@ static const struct mtk_i2c_compatible mt6577_compat = { .dcm = 1, .auto_restart = 0, .aux_len_reg = 0, - .support_33bits = 0, .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, .apdma_sync = 0, + .max_dma_support = 32, }; static const struct mtk_i2c_compatible mt6589_compat = { @@ -339,11 +338,11 @@ static const struct mtk_i2c_compatible mt6589_compat = { .dcm = 0, .auto_restart = 0, .aux_len_reg = 0, - .support_33bits = 0, .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, .apdma_sync = 0, + .max_dma_support = 32, }; static const struct mtk_i2c_compatible mt7622_compat = { @@ -353,11 +352,11 @@ static const struct mtk_i2c_compatible mt7622_compat = { .dcm = 1, .auto_restart = 1, .aux_len_reg = 1, - .support_33bits = 0, .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, .apdma_sync = 0, + .max_dma_support = 32, }; static const struct mtk_i2c_compatible mt8173_compat = { @@ -366,11 +365,11 @@ static const struct mtk_i2c_compatible mt8173_compat = { .dcm = 1, .auto_restart = 1, .aux_len_reg = 1, - .support_33bits = 1, .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, .apdma_sync = 0, + .max_dma_support = 33, }; static const struct mtk_i2c_compatible mt8183_compat = { @@ -380,11 +379,11 @@ static const struct mtk_i2c_compatible mt8183_compat = { .dcm = 0, .auto_restart = 1, .aux_len_reg = 1, - .support_33bits = 1, .timing_adjust = 1, .dma_sync = 1, .ltiming_adjust = 1, .apdma_sync = 0, + .max_dma_support = 33, }; static const struct of_device_id mtk_i2c_of_match[] = { @@ -796,11 +795,6 @@ static int mtk_i2c_set_speed(struct mtk_i2c *i2c, unsigned int parent_clk) return 0; } -static inline u32 mtk_i2c_set_4g_mode(dma_addr_t addr) -{ - return (addr & BIT_ULL(32)) ? I2C_DMA_4G_MODE : I2C_DMA_CLR_FLAG; -} - static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, int num, int left_num) { @@ -885,8 +879,8 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, return -ENOMEM; } - if (i2c->dev_comp->support_33bits) { - reg_4g_mode = mtk_i2c_set_4g_mode(rpaddr); + if (i2c->dev_comp->max_dma_support > 32) { + reg_4g_mode = upper_32_bits(rpaddr); writel(reg_4g_mode, i2c->pdmabase + OFFSET_RX_4G_MODE); } @@ -908,8 +902,8 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, return -ENOMEM; } - if (i2c->dev_comp->support_33bits) { - reg_4g_mode = mtk_i2c_set_4g_mode(wpaddr); + if (i2c->dev_comp->max_dma_support > 32) { + reg_4g_mode = upper_32_bits(wpaddr); writel(reg_4g_mode, i2c->pdmabase + OFFSET_TX_4G_MODE); } @@ -954,11 +948,11 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, return -ENOMEM; } - if (i2c->dev_comp->support_33bits) { - reg_4g_mode = mtk_i2c_set_4g_mode(wpaddr); + if (i2c->dev_comp->max_dma_support > 32) { + reg_4g_mode = upper_32_bits(wpaddr); writel(reg_4g_mode, i2c->pdmabase + OFFSET_TX_4G_MODE); - reg_4g_mode = mtk_i2c_set_4g_mode(rpaddr); + reg_4g_mode = upper_32_bits(rpaddr); writel(reg_4g_mode, i2c->pdmabase + OFFSET_RX_4G_MODE); } @@ -1232,8 +1226,9 @@ static int mtk_i2c_probe(struct platform_device *pdev) return -EINVAL; } - if (i2c->dev_comp->support_33bits) { - ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(33)); + if (i2c->dev_comp->max_dma_support > 32) { + ret = dma_set_mask(&pdev->dev, + DMA_BIT_MASK(i2c->dev_comp->max_dma_support)); if (ret) { dev_err(&pdev->dev, "dma_set_mask return error.\n"); return ret; From 311df9d580306cbfd98b6f7c860175a2471e2130 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Wed, 5 Aug 2020 18:52:21 +0800 Subject: [PATCH 028/122] dt-bindings: i2c: update bindings for MT8192 SoC Add a DT binding documentation for the MT8192 soc. Acked-by: Rob Herring Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt index 88b71c1b32c9..7f0194fdd0cc 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt @@ -14,6 +14,7 @@ Required properties: "mediatek,mt7629-i2c", "mediatek,mt2712-i2c": for MediaTek MT7629 "mediatek,mt8173-i2c": for MediaTek MT8173 "mediatek,mt8183-i2c": for MediaTek MT8183 + "mediatek,mt8192-i2c": for MediaTek MT8192 "mediatek,mt8516-i2c", "mediatek,mt2712-i2c": for MediaTek MT8516 - reg: physical base address of the controller and dma base, length of memory mapped region. From 789e67ba5454c0fa78471a6cc5ae143794fb8033 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Wed, 5 Aug 2020 18:52:22 +0800 Subject: [PATCH 029/122] i2c: mediatek: Add i2c compatible for MediaTek MT8192 Add i2c compatible for MT8192. Compare to MT8183 i2c controller, MT8192 support more then 8GB DMA mode. Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 463860e1d54b..e889f74703e4 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -386,6 +386,20 @@ static const struct mtk_i2c_compatible mt8183_compat = { .max_dma_support = 33, }; +static const struct mtk_i2c_compatible mt8192_compat = { + .quirks = &mt8183_i2c_quirks, + .regs = mt_i2c_regs_v2, + .pmic_i2c = 0, + .dcm = 0, + .auto_restart = 1, + .aux_len_reg = 1, + .timing_adjust = 1, + .dma_sync = 1, + .ltiming_adjust = 1, + .apdma_sync = 1, + .max_dma_support = 36, +}; + static const struct of_device_id mtk_i2c_of_match[] = { { .compatible = "mediatek,mt2712-i2c", .data = &mt2712_compat }, { .compatible = "mediatek,mt6577-i2c", .data = &mt6577_compat }, @@ -393,6 +407,7 @@ static const struct of_device_id mtk_i2c_of_match[] = { { .compatible = "mediatek,mt7622-i2c", .data = &mt7622_compat }, { .compatible = "mediatek,mt8173-i2c", .data = &mt8173_compat }, { .compatible = "mediatek,mt8183-i2c", .data = &mt8183_compat }, + { .compatible = "mediatek,mt8192-i2c", .data = &mt8192_compat }, {} }; MODULE_DEVICE_TABLE(of, mtk_i2c_of_match); From 8912fd6a61d7474ea9b43be93f136034d28868d5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 6 Aug 2020 12:42:56 +0100 Subject: [PATCH 030/122] net: hns3: fix spelling mistake "could'nt" -> "couldn't" There is a spelling mistake in a dev_err message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 9162856de1b1..e972138a14ad 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1728,7 +1728,7 @@ static int hclgevf_reset_wait(struct hclgevf_dev *hdev) /* hardware completion status should be available by this time */ if (ret) { dev_err(&hdev->pdev->dev, - "could'nt get reset done status from h/w, timeout!\n"); + "couldn't get reset done status from h/w, timeout!\n"); return ret; } From 6bcaf41f9613278cd5897fc80ab93033bda8efaa Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 4 Aug 2020 17:47:57 -0700 Subject: [PATCH 031/122] selftests/bpf: Prevent runqslower from racing on building bpftool runqslower's Makefile is building/installing bpftool into $(OUTPUT)/sbin/bpftool, which coincides with $(DEFAULT_BPFTOOL). In practice this means that often when building selftests from scratch (after `make clean`), selftests are racing with runqslower to simultaneously build bpftool and one of the two processes fail due to file being busy. Prevent this race by explicitly order-depending on $(BPFTOOL_DEFAULT). Fixes: a2c9652f751e ("selftests: Refactor build to remove tools/lib/bpf from include path") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805004757.2960750-1-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e7a8cf83ba48..48425f9251b5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -142,7 +142,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) -$(OUTPUT)/runqslower: $(BPFOBJ) +DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool + +$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ @@ -164,7 +166,6 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c -DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(BPFOBJ) | $(BUILD_DIR)/bpftool From 5e7b30205cef80f6bb922e61834437ca7bff5837 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 4 Aug 2020 22:50:56 -0700 Subject: [PATCH 032/122] bpf: Change uapi for bpf iterator map elements Commit a5cbe05a6673 ("bpf: Implement bpf iterator for map elements") added bpf iterator support for map elements. The map element bpf iterator requires info to identify a particular map. In the above commit, the attr->link_create.target_fd is used to carry map_fd and an enum bpf_iter_link_info is added to uapi to specify the target_fd actually representing a map_fd: enum bpf_iter_link_info { BPF_ITER_LINK_UNSPEC = 0, BPF_ITER_LINK_MAP_FD = 1, MAX_BPF_ITER_LINK_INFO, }; This is an extensible approach as we can grow enumerator for pid, cgroup_id, etc. and we can unionize target_fd for pid, cgroup_id, etc. But in the future, there are chances that more complex customization may happen, e.g., for tasks, it could be filtered based on both cgroup_id and user_id. This patch changed the uapi to have fields __aligned_u64 iter_info; __u32 iter_info_len; for additional iter_info for link_create. The iter_info is defined as union bpf_iter_link_info { struct { __u32 map_fd; } map; }; So future extension for additional customization will be easier. The bpf_iter_link_info will be passed to target callback to validate and generic bpf_iter framework does not need to deal it any more. Note that map_fd = 0 will be considered invalid and -EBADF will be returned to user space. Fixes: a5cbe05a6673 ("bpf: Implement bpf iterator for map elements") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805055056.1457463-1-yhs@fb.com --- include/linux/bpf.h | 10 ++++--- include/uapi/linux/bpf.h | 15 +++++----- kernel/bpf/bpf_iter.c | 58 +++++++++++++++++++-------------------- kernel/bpf/map_iter.c | 37 +++++++++++++++++++------ kernel/bpf/syscall.c | 2 +- net/core/bpf_sk_storage.c | 37 +++++++++++++++++++------ 6 files changed, 102 insertions(+), 57 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cef4ef0d2b4e..55f694b63164 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info { struct bpf_map *map; }; -typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux); +typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux); +typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux); #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; - bpf_iter_check_target_t check_target; + bpf_iter_attach_target_t attach_target; + bpf_iter_detach_target_t detach_target; u32 ctx_arg_info_size; - enum bpf_iter_link_info req_linfo; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b134e679e9db..0480f893facd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type */ }; +union bpf_iter_link_info { + struct { + __u32 map_fd; + } map; +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -249,13 +255,6 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; -enum bpf_iter_link_info { - BPF_ITER_LINK_UNSPEC = 0, - BPF_ITER_LINK_MAP_FD = 1, - - MAX_BPF_ITER_LINK_INFO, -}; - /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -623,6 +622,8 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 363b9cafc2d8..b6715964b685 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -338,8 +338,8 @@ static void bpf_iter_link_release(struct bpf_link *link) struct bpf_iter_link *iter_link = container_of(link, struct bpf_iter_link, link); - if (iter_link->aux.map) - bpf_map_put_with_uref(iter_link->aux.map); + if (iter_link->tinfo->reg_info->detach_target) + iter_link->tinfo->reg_info->detach_target(&iter_link->aux); } static void bpf_iter_link_dealloc(struct bpf_link *link) @@ -390,15 +390,35 @@ bool bpf_link_is_iter(struct bpf_link *link) int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { + union bpf_iter_link_info __user *ulinfo; struct bpf_link_primer link_primer; struct bpf_iter_target_info *tinfo; - struct bpf_iter_aux_info aux = {}; + union bpf_iter_link_info linfo; struct bpf_iter_link *link; - u32 prog_btf_id, target_fd; + u32 prog_btf_id, linfo_len; bool existed = false; - struct bpf_map *map; int err; + if (attr->link_create.target_fd || attr->link_create.flags) + return -EINVAL; + + memset(&linfo, 0, sizeof(union bpf_iter_link_info)); + + ulinfo = u64_to_user_ptr(attr->link_create.iter_info); + linfo_len = attr->link_create.iter_info_len; + if (!ulinfo ^ !linfo_len) + return -EINVAL; + + if (ulinfo) { + err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), + linfo_len); + if (err) + return err; + linfo_len = min_t(u32, linfo_len, sizeof(linfo)); + if (copy_from_user(&linfo, ulinfo, linfo_len)) + return -EFAULT; + } + prog_btf_id = prog->aux->attach_btf_id; mutex_lock(&targets_mutex); list_for_each_entry(tinfo, &targets, list) { @@ -411,13 +431,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (!existed) return -ENOENT; - /* Make sure user supplied flags are target expected. */ - target_fd = attr->link_create.target_fd; - if (attr->link_create.flags != tinfo->reg_info->req_linfo) - return -EINVAL; - if (!attr->link_create.flags && target_fd) - return -EINVAL; - link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); if (!link) return -ENOMEM; @@ -431,28 +444,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) return err; } - if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) { - map = bpf_map_get_with_uref(target_fd); - if (IS_ERR(map)) { - err = PTR_ERR(map); - goto cleanup_link; - } - - aux.map = map; - err = tinfo->reg_info->check_target(prog, &aux); + if (tinfo->reg_info->attach_target) { + err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); if (err) { - bpf_map_put_with_uref(map); - goto cleanup_link; + bpf_link_cleanup(&link_primer); + return err; } - - link->aux.map = map; } return bpf_link_settle(&link_primer); - -cleanup_link: - bpf_link_cleanup(&link_primer); - return err; } static void init_seq_meta(struct bpf_iter_priv_data *priv_data, diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index fbe1f557cb88..af86048e5afd 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -98,12 +98,21 @@ static struct bpf_iter_reg bpf_map_reg_info = { .seq_info = &bpf_map_seq_info, }; -static int bpf_iter_check_map(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux) +static int bpf_iter_attach_map(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux) { u32 key_acc_size, value_acc_size, key_size, value_size; - struct bpf_map *map = aux->map; + struct bpf_map *map; bool is_percpu = false; + int err = -EINVAL; + + if (!linfo->map.map_fd) + return -EBADF; + + map = bpf_map_get_with_uref(linfo->map.map_fd); + if (IS_ERR(map)) + return PTR_ERR(map); if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || @@ -112,7 +121,7 @@ static int bpf_iter_check_map(struct bpf_prog *prog, else if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && map->map_type != BPF_MAP_TYPE_ARRAY) - return -EINVAL; + goto put_map; key_acc_size = prog->aux->max_rdonly_access; value_acc_size = prog->aux->max_rdwr_access; @@ -122,10 +131,22 @@ static int bpf_iter_check_map(struct bpf_prog *prog, else value_size = round_up(map->value_size, 8) * num_possible_cpus(); - if (key_acc_size > key_size || value_acc_size > value_size) - return -EACCES; + if (key_acc_size > key_size || value_acc_size > value_size) { + err = -EACCES; + goto put_map; + } + aux->map = map; return 0; + +put_map: + bpf_map_put_with_uref(map); + return err; +} + +static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) +{ + bpf_map_put_with_uref(aux->map); } DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta, @@ -133,8 +154,8 @@ DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta, static const struct bpf_iter_reg bpf_map_elem_reg_info = { .target = "bpf_map_elem", - .check_target = bpf_iter_check_map, - .req_linfo = BPF_ITER_LINK_MAP_FD, + .attach_target = bpf_iter_attach_map, + .detach_target = bpf_iter_detach_map, .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_map_elem, key), diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2f343ce15747..86299a292214 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3883,7 +3883,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog * return -EINVAL; } -#define BPF_LINK_CREATE_LAST_FIELD link_create.flags +#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len static int link_create(union bpf_attr *attr) { enum bpf_prog_type ptype; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index d3377c90a291..b988f48153a4 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -1384,18 +1384,39 @@ static int bpf_iter_init_sk_storage_map(void *priv_data, return 0; } -static int bpf_iter_check_map(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux) +static int bpf_iter_attach_map(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux) { - struct bpf_map *map = aux->map; + struct bpf_map *map; + int err = -EINVAL; + + if (!linfo->map.map_fd) + return -EBADF; + + map = bpf_map_get_with_uref(linfo->map.map_fd); + if (IS_ERR(map)) + return PTR_ERR(map); if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) - return -EINVAL; + goto put_map; - if (prog->aux->max_rdonly_access > map->value_size) - return -EACCES; + if (prog->aux->max_rdonly_access > map->value_size) { + err = -EACCES; + goto put_map; + } + aux->map = map; return 0; + +put_map: + bpf_map_put_with_uref(map); + return err; +} + +static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) +{ + bpf_map_put_with_uref(aux->map); } static const struct seq_operations bpf_sk_storage_map_seq_ops = { @@ -1414,8 +1435,8 @@ static const struct bpf_iter_seq_info iter_seq_info = { static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { .target = "bpf_sk_storage_map", - .check_target = bpf_iter_check_map, - .req_linfo = BPF_ITER_LINK_MAP_FD, + .attach_target = bpf_iter_attach_map, + .detach_target = bpf_iter_detach_map, .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), From 74fc097de327b37e8fe3ff580ce7ffaa7c1740dd Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 4 Aug 2020 22:50:58 -0700 Subject: [PATCH 033/122] tools/bpf: Support new uapi for map element bpf iterator Previous commit adjusted kernel uapi for map element bpf iterator. This patch adjusted libbpf API due to uapi change. bpftool and bpf_iter selftests are also changed accordingly. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805055058.1457623-1-yhs@fb.com --- tools/bpf/bpftool/iter.c | 9 +++-- tools/include/uapi/linux/bpf.h | 15 +++---- tools/lib/bpf/bpf.c | 3 ++ tools/lib/bpf/bpf.h | 5 ++- tools/lib/bpf/libbpf.c | 6 +-- tools/lib/bpf/libbpf.h | 5 ++- .../selftests/bpf/prog_tests/bpf_iter.c | 40 +++++++++++++++---- 7 files changed, 58 insertions(+), 25 deletions(-) diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index c9dba7543dba..3b1aad7535dd 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -11,6 +11,7 @@ static int do_pin(int argc, char **argv) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts); + union bpf_iter_link_info linfo; const char *objfile, *path; struct bpf_program *prog; struct bpf_object *obj; @@ -36,6 +37,11 @@ static int do_pin(int argc, char **argv) map_fd = map_parse_fd(&argc, &argv); if (map_fd < 0) return -1; + + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + iter_opts.link_info = &linfo; + iter_opts.link_info_len = sizeof(linfo); } } @@ -57,9 +63,6 @@ static int do_pin(int argc, char **argv) goto close_obj; } - if (map_fd >= 0) - iter_opts.map_fd = map_fd; - link = bpf_program__attach_iter(prog, &iter_opts); if (IS_ERR(link)) { err = PTR_ERR(link); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b134e679e9db..0480f893facd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type */ }; +union bpf_iter_link_info { + struct { + __u32 map_fd; + } map; +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -249,13 +255,6 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; -enum bpf_iter_link_info { - BPF_ITER_LINK_UNSPEC = 0, - BPF_ITER_LINK_MAP_FD = 1, - - MAX_BPF_ITER_LINK_INFO, -}; - /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -623,6 +622,8 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index eab14c97c15d..0750681057c2 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -599,6 +599,9 @@ int bpf_link_create(int prog_fd, int target_fd, attr.link_create.target_fd = target_fd; attr.link_create.attach_type = attach_type; attr.link_create.flags = OPTS_GET(opts, flags, 0); + attr.link_create.iter_info = + ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); + attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0); return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 28855fd5b5f4..015d13f25fcc 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -168,11 +168,14 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); +union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */ struct bpf_link_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 flags; + union bpf_iter_link_info *iter_info; + __u32 iter_info_len; }; -#define bpf_link_create_opts__last_field flags +#define bpf_link_create_opts__last_field iter_info_len LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7be04e45d29c..0a06124f7999 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8306,10 +8306,8 @@ bpf_program__attach_iter(struct bpf_program *prog, if (!OPTS_VALID(opts, bpf_iter_attach_opts)) return ERR_PTR(-EINVAL); - if (OPTS_HAS(opts, map_fd)) { - target_fd = opts->map_fd; - link_create_opts.flags = BPF_ITER_LINK_MAP_FD; - } + link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); + link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 3ed1399bfbbc..5ecb4069a9f0 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -267,9 +267,10 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ - __u32 map_fd; + union bpf_iter_link_info *link_info; + __u32 link_info_len; }; -#define bpf_iter_attach_opts__last_field map_fd +#define bpf_iter_attach_opts__last_field link_info_len LIBBPF_API struct bpf_link * bpf_program__attach_iter(struct bpf_program *prog, diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 4ffefdc1130f..7375d9a6d242 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -468,6 +468,7 @@ static void test_bpf_hash_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_hash_map *skel; int err, i, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; __u64 val, expected_val = 0; struct bpf_link *link; struct key_t { @@ -490,13 +491,16 @@ static void test_bpf_hash_map(void) goto out; /* iterator with hashmap2 and hashmap3 should fail */ - opts.map_fd = bpf_map__fd(skel->maps.hashmap2); + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "attach_iter for hashmap2 unexpected succeeded\n")) goto out; - opts.map_fd = bpf_map__fd(skel->maps.hashmap3); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "attach_iter for hashmap3 unexpected succeeded\n")) @@ -519,7 +523,7 @@ static void test_bpf_hash_map(void) goto out; } - opts.map_fd = map_fd; + linfo.map.map_fd = map_fd; link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -562,6 +566,7 @@ static void test_bpf_percpu_hash_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_hash_map *skel; int err, i, j, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; __u32 expected_val = 0; struct bpf_link *link; struct key_t { @@ -606,7 +611,10 @@ static void test_bpf_percpu_hash_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -649,6 +657,7 @@ static void test_bpf_array_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); __u32 expected_key = 0, res_first_key; struct bpf_iter_bpf_array_map *skel; + union bpf_iter_link_info linfo; int err, i, map_fd, iter_fd; struct bpf_link *link; char buf[64] = {}; @@ -673,7 +682,10 @@ static void test_bpf_array_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -730,6 +742,7 @@ static void test_bpf_percpu_array_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_array_map *skel; __u32 expected_key = 0, expected_val = 0; + union bpf_iter_link_info linfo; int err, i, j, map_fd, iter_fd; struct bpf_link *link; char buf[64]; @@ -765,7 +778,10 @@ static void test_bpf_percpu_array_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -803,6 +819,7 @@ static void test_bpf_sk_storage_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); int err, i, len, map_fd, iter_fd, num_sockets; struct bpf_iter_bpf_sk_storage_map *skel; + union bpf_iter_link_info linfo; int sock_fd[3] = {-1, -1, -1}; __u32 val, expected_val = 0; struct bpf_link *link; @@ -829,7 +846,10 @@ static void test_bpf_sk_storage_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -871,6 +891,7 @@ static void test_rdonly_buf_out_of_bound(void) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_test_kern5 *skel; + union bpf_iter_link_info linfo; struct bpf_link *link; skel = bpf_iter_test_kern5__open_and_load(); @@ -878,7 +899,10 @@ static void test_rdonly_buf_out_of_bound(void) "skeleton open_and_load failed\n")) return; - opts.map_fd = bpf_map__fd(skel->maps.hashmap1); + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n")) bpf_link__destroy(link); From 932ac54a3e59335a847f7682b5124a788ab3c798 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Thu, 6 Aug 2020 07:33:59 +0900 Subject: [PATCH 034/122] libbf: Fix uninitialized pointer at btf__parse_raw() Recently, from commit 94a1fedd63ed ("libbpf: Add btf__parse_raw() and generic btf__parse() APIs"), new API has been added to libbpf that allows to parse BTF from raw data file (btf__parse_raw()). The commit derives build failure of samples/bpf due to improper access of uninitialized pointer at btf_parse_raw(). btf.c: In function btf__parse_raw: btf.c:625:28: error: btf may be used uninitialized in this function 625 | return err ? ERR_PTR(err) : btf; | ~~~~~~~~~~~~~~~~~~~^~~~~ This commit fixes the build failure of samples/bpf by adding code of initializing btf pointer as NULL. Fixes: 94a1fedd63ed ("libbpf: Add btf__parse_raw() and generic btf__parse() APIs") Signed-off-by: Daniel T. Lee Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805223359.32109-1-danieltimlee@gmail.com --- tools/lib/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 856b09a04563..4843e44916f7 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -564,8 +564,8 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) struct btf *btf__parse_raw(const char *path) { + struct btf *btf = NULL; void *data = NULL; - struct btf *btf; FILE *f = NULL; __u16 magic; int err = 0; From d48556f45608921fa07cb882f9dd15a8a1203427 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 6 Aug 2020 08:52:25 -0700 Subject: [PATCH 035/122] bpf: Add missing return to resolve_btfids int sets_patch(struct object *obj) doesn't have a 'return 0' at the end. Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object") Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200806155225.637202-1-sdf@google.com --- tools/bpf/resolve_btfids/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 52d883325a23..4d9ecb975862 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -566,6 +566,7 @@ static int sets_patch(struct object *obj) next = rb_next(next); } + return 0; } static int symbols_patch(struct object *obj) From 0d360d64b01231cdb36e1936de889f308fd9317f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 6 Aug 2020 11:26:12 -0700 Subject: [PATCH 036/122] bpf: Remove inline from bpf_do_trace_printk I get the following error during compilation on my side: kernel/trace/bpf_trace.c: In function 'bpf_do_trace_printk': kernel/trace/bpf_trace.c:386:34: error: function 'bpf_do_trace_printk' can never be inlined because it uses variable argument lists static inline __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) ^ Fixes: ac5a72ea5c89 ("bpf: Use dedicated bpf_trace_printk event instead of trace_printk()") Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200806182612.1390883-1-sdf@google.com --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index cb91ef902cc4..a8d4f253ed77 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -383,7 +383,7 @@ static DEFINE_RAW_SPINLOCK(trace_printk_lock); #define BPF_TRACE_PRINTK_SIZE 1024 -static inline __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) +static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) { static char buf[BPF_TRACE_PRINTK_SIZE]; unsigned long flags; From 6fc5916cc256b8e92cc7ad3b34cc4d2a7efa1d5c Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 6 Aug 2020 20:39:59 +0200 Subject: [PATCH 037/122] selftests: bpf: Switch off timeout Several bpf tests are interrupted by the default timeout of 45 seconds added by commit 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test"). In my case it was test_progs, test_tunnel.sh, test_lwt_ip_encap.sh and test_xdping.sh. There's not much value in having a timeout for bpf tests, switch it off. Signed-off-by: Jiri Benc Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/7a9198ed10917f4ecab4a3dd74bcda1200791efd.1596739059.git.jbenc@redhat.com --- tools/testing/selftests/bpf/settings | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/bpf/settings diff --git a/tools/testing/selftests/bpf/settings b/tools/testing/selftests/bpf/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/bpf/settings @@ -0,0 +1 @@ +timeout=0 From 929e54a989680c6f134b02293732030b897475dc Mon Sep 17 00:00:00 2001 From: Jianlin Lv Date: Thu, 6 Aug 2020 18:42:24 +0800 Subject: [PATCH 038/122] bpf: Fix compilation warning of selftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang compiler version: 12.0.0 The following warning appears during the selftests/bpf compilation: prog_tests/send_signal.c:51:3: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] 51 | write(pipe_c2p[1], buf, 1); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ prog_tests/send_signal.c:54:3: warning: ignoring return value of ‘read’, declared with attribute warn_unused_result [-Wunused-result] 54 | read(pipe_p2c[0], buf, 1); | ^~~~~~~~~~~~~~~~~~~~~~~~~ ...... prog_tests/stacktrace_build_id_nmi.c:13:2: warning: ignoring return value of ‘fscanf’,declared with attribute warn_unused_result [-Wunused-resul] 13 | fscanf(f, "%llu", &sample_freq); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ test_tcpnotify_user.c:133:2: warning:ignoring return value of ‘system’, declared with attribute warn_unused_result [-Wunused-result] 133 | system(test_script); | ^~~~~~~~~~~~~~~~~~~ test_tcpnotify_user.c:138:2: warning:ignoring return value of ‘system’, declared with attribute warn_unused_result [-Wunused-result] 138 | system(test_script); | ^~~~~~~~~~~~~~~~~~~ test_tcpnotify_user.c:143:2: warning:ignoring return value of ‘system’, declared with attribute warn_unused_result [-Wunused-result] 143 | system(test_script); | ^~~~~~~~~~~~~~~~~~~ Add code that fix compilation warning about ignoring return value and handles any errors; Check return value of library`s API make the code more secure. Signed-off-by: Jianlin Lv Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200806104224.95306-1-Jianlin.Lv@arm.com --- .../selftests/bpf/prog_tests/send_signal.c | 18 ++++++++---------- .../bpf/prog_tests/stacktrace_build_id_nmi.c | 4 +++- .../selftests/bpf/test_tcpnotify_user.c | 13 ++++++++++--- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 504abb7bfb95..7043e6ded0e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -48,21 +48,19 @@ static void test_send_signal_common(struct perf_event_attr *attr, close(pipe_p2c[1]); /* close write */ /* notify parent signal handler is installed */ - write(pipe_c2p[1], buf, 1); + CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* make sure parent enabled bpf program to send_signal */ - read(pipe_p2c[0], buf, 1); + CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); /* wait a little for signal handler */ sleep(1); - if (sigusr1_received) - write(pipe_c2p[1], "2", 1); - else - write(pipe_c2p[1], "0", 1); + buf[0] = sigusr1_received ? '2' : '0'; + CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* wait for parent notification and exit */ - read(pipe_p2c[0], buf, 1); + CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); close(pipe_c2p[1]); close(pipe_p2c[0]); @@ -99,7 +97,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, } /* wait until child signal handler installed */ - read(pipe_c2p[0], buf, 1); + CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); /* trigger the bpf send_signal */ skel->bss->pid = pid; @@ -107,7 +105,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, skel->bss->signal_thread = signal_thread; /* notify child that bpf program can send_signal now */ - write(pipe_p2c[1], buf, 1); + CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* wait for result */ err = read(pipe_c2p[0], buf, 1); @@ -121,7 +119,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, CHECK(buf[0] != '2', test_name, "incorrect result\n"); /* notify child safe to exit */ - write(pipe_p2c[1], buf, 1); + CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); disable_pmu: close(pmu_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index f002e3090d92..11a769e18f5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -6,11 +6,13 @@ static __u64 read_perf_max_sample_freq(void) { __u64 sample_freq = 5000; /* fallback to 5000 on error */ FILE *f; + __u32 duration = 0; f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r"); if (f == NULL) return sample_freq; - fscanf(f, "%llu", &sample_freq); + CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate", + "return default value: 5000,err %d\n", -errno); fclose(f); return sample_freq; } diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 8549b31716ab..73da7fe8c152 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -124,17 +124,24 @@ int main(int argc, char **argv) sprintf(test_script, "iptables -A INPUT -p tcp --dport %d -j DROP", TESTPORT); - system(test_script); + if (system(test_script)) { + printf("FAILED: execute command: %s, err %d\n", test_script, -errno); + goto err; + } sprintf(test_script, "nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", TESTPORT); - system(test_script); + if (system(test_script)) + printf("execute command: %s, err %d\n", test_script, -errno); sprintf(test_script, "iptables -D INPUT -p tcp --dport %d -j DROP", TESTPORT); - system(test_script); + if (system(test_script)) { + printf("FAILED: execute command: %s, err %d\n", test_script, -errno); + goto err; + } rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g); if (rv != 0) { From c7ca03c216acb14466a713fedf1b9f2c24994ef2 Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 5 Aug 2020 18:50:40 -0700 Subject: [PATCH 039/122] drivers/net/wan/lapbether: Added needed_headroom and a skb->len check 1. Added a skb->len check This driver expects upper layers to include a pseudo header of 1 byte when passing down a skb for transmission. This driver will read this 1-byte header. This patch added a skb->len check before reading the header to make sure the header exists. 2. Changed to use needed_headroom instead of hard_header_len to request necessary headroom to be allocated In net/packet/af_packet.c, the function packet_snd first reserves a headroom of length (dev->hard_header_len + dev->needed_headroom). Then if the socket is a SOCK_DGRAM socket, it calls dev_hard_header, which calls dev->header_ops->create, to create the link layer header. If the socket is a SOCK_RAW socket, it "un-reserves" a headroom of length (dev->hard_header_len), and assumes the user to provide the appropriate link layer header. So according to the logic of af_packet.c, dev->hard_header_len should be the length of the header that would be created by dev->header_ops->create. However, this driver doesn't provide dev->header_ops, so logically dev->hard_header_len should be 0. So we should use dev->needed_headroom instead of dev->hard_header_len to request necessary headroom to be allocated. This change fixes kernel panic when this driver is used with AF_PACKET SOCK_RAW sockets. Call stack when panic: [ 168.399197] skbuff: skb_under_panic: text:ffffffff819d95fb len:20 put:14 head:ffff8882704c0a00 data:ffff8882704c09fd tail:0x11 end:0xc0 dev:veth0 ... [ 168.399255] Call Trace: [ 168.399259] skb_push.cold+0x14/0x24 [ 168.399262] eth_header+0x2b/0xc0 [ 168.399267] lapbeth_data_transmit+0x9a/0xb0 [lapbether] [ 168.399275] lapb_data_transmit+0x22/0x2c [lapb] [ 168.399277] lapb_transmit_buffer+0x71/0xb0 [lapb] [ 168.399279] lapb_kick+0xe3/0x1c0 [lapb] [ 168.399281] lapb_data_request+0x76/0xc0 [lapb] [ 168.399283] lapbeth_xmit+0x56/0x90 [lapbether] [ 168.399286] dev_hard_start_xmit+0x91/0x1f0 [ 168.399289] ? irq_init_percpu_irqstack+0xc0/0x100 [ 168.399291] __dev_queue_xmit+0x721/0x8e0 [ 168.399295] ? packet_parse_headers.isra.0+0xd2/0x110 [ 168.399297] dev_queue_xmit+0x10/0x20 [ 168.399298] packet_sendmsg+0xbf0/0x19b0 ...... Cc: Willem de Bruijn Cc: Martin Schiller Cc: Brian Norris Signed-off-by: Xie He Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/wan/lapbether.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index b2868433718f..1ea15f2123ed 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -157,6 +157,12 @@ static netdev_tx_t lapbeth_xmit(struct sk_buff *skb, if (!netif_running(dev)) goto drop; + /* There should be a pseudo header of 1 byte added by upper layers. + * Check to make sure it is there before reading it. + */ + if (skb->len < 1) + goto drop; + switch (skb->data[0]) { case X25_IFACE_DATA: break; @@ -305,6 +311,7 @@ static void lapbeth_setup(struct net_device *dev) dev->netdev_ops = &lapbeth_netdev_ops; dev->needs_free_netdev = true; dev->type = ARPHRD_X25; + dev->hard_header_len = 0; dev->mtu = 1000; dev->addr_len = 0; } @@ -331,7 +338,8 @@ static int lapbeth_new_device(struct net_device *dev) * then this driver prepends a length field of 2 bytes, * then the underlying Ethernet device prepends its own header. */ - ndev->hard_header_len = -1 + 3 + 2 + dev->hard_header_len; + ndev->needed_headroom = -1 + 3 + 2 + dev->hard_header_len + + dev->needed_headroom; lapbeth = netdev_priv(ndev); lapbeth->axdev = ndev; From 7fb20f9e901e6df2f7dc032d88da5abff4117d37 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Fri, 7 Aug 2020 12:50:29 +0100 Subject: [PATCH 040/122] bpf, doc: Remove references to warning message when using bpf_trace_printk() The BPF helper bpf_trace_printk() no longer uses trace_printk(); it is now triggers a dedicated trace event. Hence the described warning is no longer present, so remove the discussion of it as it may confuse people. Fixes: ac5a72ea5c89 ("bpf: Use dedicated bpf_trace_printk event instead of trace_printk()") Signed-off-by: Alan Maguire Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/1596801029-32395-1-git-send-email-alan.maguire@oracle.com --- Documentation/bpf/bpf_design_QA.rst | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst index 12a246fcf6cb..2df7b067ab93 100644 --- a/Documentation/bpf/bpf_design_QA.rst +++ b/Documentation/bpf/bpf_design_QA.rst @@ -246,17 +246,6 @@ program is loaded the kernel will print warning message, so this helper is only useful for experiments and prototypes. Tracing BPF programs are root only. -Q: bpf_trace_printk() helper warning ------------------------------------- -Q: When bpf_trace_printk() helper is used the kernel prints nasty -warning message. Why is that? - -A: This is done to nudge program authors into better interfaces when -programs need to pass data to user space. Like bpf_perf_event_output() -can be used to efficiently stream data via perf ring buffer. -BPF maps can be used for asynchronous data sharing between kernel -and user space. bpf_trace_printk() should only be used for debugging. - Q: New functionality via kernel modules? ---------------------------------------- Q: Can BPF functionality such as new program or map types, new From d5ca590525cfbd87ca307dcf498a566e2e7c1767 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Aug 2020 20:30:57 -0700 Subject: [PATCH 041/122] selftests/bpf: Fix silent Makefile output 99aacebecb75 ("selftests: do not use .ONESHELL") removed .ONESHELL, which changes how Makefile "silences" multi-command target recipes. selftests/bpf's Makefile relied (a somewhat unknowingly) on .ONESHELL behavior of silencing all commands within the recipe if the first command contains @ symbol. Removing .ONESHELL exposed this hack. This patch fixes the issue by explicitly silencing each command with $(Q). Also explicitly define fallback rule for building *.o from *.c, instead of relying on non-silent inherited rule. This was causing a non-silent output for bench.o object file. Fixes: 92f7440ecc93 ("selftests/bpf: More succinct Makefile output") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200807033058.848677-1-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 48 +++++++++++++++------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 48425f9251b5..a83b5827532f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -102,7 +102,7 @@ endif OVERRIDE_TARGETS := 1 override define CLEAN $(call msg,CLEAN) - $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) + $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) endef include ../lib.mk @@ -123,17 +123,21 @@ $(notdir $(TEST_GEN_PROGS) \ $(TEST_GEN_PROGS_EXTENDED) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; +$(OUTPUT)/%.o: %.c + $(call msg,CC,,$@) + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) - $(LINK.c) $^ $(LDLIBS) -o $@ + $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ $(OUTPUT)/urandom_read: urandom_read.c $(call msg,BINARY,,$@) - $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id + $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) - $(CC) -c $(CFLAGS) -o $@ $< + $(Q)$(CC) -c $(CFLAGS) -o $@ $< VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -181,15 +185,15 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR): $(call msg,MKDIR,,$@) - mkdir -p $@ + $(Q)mkdir -p $@ $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) ifeq ($(VMLINUX_H),) $(call msg,GEN,,$@) - $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ else $(call msg,CP,,$@) - cp "$(VMLINUX_H)" $@ + $(Q)cp "$(VMLINUX_H)" $@ endif $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ @@ -238,28 +242,28 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h # $4 - LDFLAGS define CLANG_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC define CLANG_NATIVE_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) - $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 + $(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2 endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c @@ -301,7 +305,7 @@ ifeq ($($(TRUNNER_OUTPUT)-dir),) $(TRUNNER_OUTPUT)-dir := y $(TRUNNER_OUTPUT): $$(call msg,MKDIR,,$$@) - mkdir -p $$@ + $(Q)mkdir -p $$@ endif # ensure we set up BPF objects generation rule just once for a given @@ -321,7 +325,7 @@ $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ $(TRUNNER_OUTPUT)/%.o \ | $(BPFTOOL) $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) - $$(BPFTOOL) gen skeleton $$< > $$@ + $(Q)$$(BPFTOOL) gen skeleton $$< > $$@ endif # ensure we set up tests.h header generation rule just once @@ -345,7 +349,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_BPF_SKELS) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) - cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) + $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ %.c \ @@ -353,13 +357,13 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_TESTS_HDR) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) - $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ + $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ # only copy extra resources if in flavored build $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) ifneq ($2,) $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) - cp -a $$^ $(TRUNNER_OUTPUT)/ + $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ @@ -367,8 +371,8 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(RESOLVE_BTFIDS) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) - $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ - $(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ + $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ + $(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ endef @@ -421,17 +425,17 @@ verifier/tests.h: verifier/*.c ) > verifier/tests.h) $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) - $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ + $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(call msg,CC,,$@) - $(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ @@ -444,7 +448,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_trigger.o \ $(OUTPUT)/bench_ringbufs.o $(call msg,BINARY,,$@) - $(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) + $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ From b8c1a3090741f349322ad855d2b66d6e9752a60d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 6 Aug 2020 20:31:41 -0700 Subject: [PATCH 042/122] bpf: Delete repeated words in comments Drop repeated words in kernel/bpf/: {has, the} Signed-off-by: Randy Dunlap Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200807033141.10437-1-rdunlap@infradead.org --- kernel/bpf/core.c | 2 +- kernel/bpf/verifier.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bde93344164d..ed0b3578867c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1966,7 +1966,7 @@ void bpf_prog_array_delete_safe(struct bpf_prog_array *array, * @index: the index of the program to replace * * Skips over dummy programs, by not counting them, when calculating - * the the position of the program to replace. + * the position of the program to replace. * * Return: * * 0 - Success diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b6ccfce3bf4c..ef938f17b944 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8294,7 +8294,7 @@ static bool stacksafe(struct bpf_func_state *old, if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != cur->stack[spi].slot_type[i % BPF_REG_SIZE]) /* Ex: old explored (safe) state has STACK_SPILL in - * this stack slot, but current has has STACK_MISC -> + * this stack slot, but current has STACK_MISC -> * this verifier states are not equivalent, * return false to continue verification of this path */ From 96cf2a2c75567ff56195fe3126d497a2e7e4379f Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Thu, 6 Aug 2020 15:18:48 -0700 Subject: [PATCH 043/122] xfs: Fix UBSAN null-ptr-deref in xfs_sysfs_init If xfs_sysfs_init is called with parent_kobj == NULL, UBSAN shows the following warning: UBSAN: null-ptr-deref in ./fs/xfs/xfs_sysfs.h:37:23 member access within null pointer of type 'struct xfs_kobj' Call Trace: dump_stack+0x10e/0x195 ubsan_type_mismatch_common+0x241/0x280 __ubsan_handle_type_mismatch_v1+0x32/0x40 init_xfs_fs+0x12b/0x28f do_one_initcall+0xdd/0x1d0 do_initcall_level+0x151/0x1b6 do_initcalls+0x50/0x8f do_basic_setup+0x29/0x2b kernel_init_freeable+0x19f/0x20b kernel_init+0x11/0x1e0 ret_from_fork+0x22/0x30 Fix it by checking parent_kobj before the code accesses its member. Signed-off-by: Eiichi Tsukata Reviewed-by: Darrick J. Wong [darrick: minor whitespace edits] Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_sysfs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index e9f810fc6731..43585850f154 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -32,9 +32,11 @@ xfs_sysfs_init( struct xfs_kobj *parent_kobj, const char *name) { + struct kobject *parent; + + parent = parent_kobj ? &parent_kobj->kobject : NULL; init_completion(&kobj->complete); - return kobject_init_and_add(&kobj->kobject, ktype, - &parent_kobj->kobject, "%s", name); + return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name); } static inline void From b9b40ee4db6cb186341b97bca4f0d7aa2a042a66 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 7 Aug 2020 09:36:32 +0200 Subject: [PATCH 044/122] r8152: Use MAC address from correct device tree node Query the USB device's device tree node when looking for a MAC address. The struct device embedded into the struct net_device does not have a device tree node attached at all. The reason why this went unnoticed is because the system where this was tested was one of the few development units that had its OTP programmed, as opposed to production systems where the MAC address is stored in a separate EEPROM and is passed via device tree by the firmware. Reported-by: EJ Hsu Fixes: acb6d3771a03 ("r8152: Use MAC address from device tree if available") Signed-off-by: Thierry Reding Reviewed-by: EJ Hsu Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7d39f998535d..2b02fefd094d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1504,7 +1504,7 @@ static int determine_ethernet_addr(struct r8152 *tp, struct sockaddr *sa) sa->sa_family = dev->type; - ret = eth_platform_get_mac_address(&dev->dev, sa->sa_data); + ret = eth_platform_get_mac_address(&tp->udev->dev, sa->sa_data); if (ret < 0) { if (tp->version == RTL_VER_01) { ret = pla_ocp_read(tp, PLA_IDR, 8, sa->sa_data); From 158b47a65aa6b1e70424c3c9c0eda8577b831ea8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Aug 2020 11:32:04 +0200 Subject: [PATCH 045/122] selftests: mptcp: fix dependecies Since commit df62f2ec3df6 ("selftests/mptcp: add diag interface tests") the MPTCP selftests relies on the MPTCP diag interface which is enabled by a specific kconfig knob: be sure to include it. Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2499824d9e1c..8df5cb8f71ff 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,4 +1,6 @@ CONFIG_MPTCP=y CONFIG_MPTCP_IPV6=y +CONFIG_INET_DIAG=m +CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m From 6bdb6211a61eaf91a22e9160b16795ee6c40f90d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Aug 2020 18:31:00 +0200 Subject: [PATCH 046/122] mptcp: more stable diag self-tests During diag self-tests we introduce long wait in the mptcp test program to give the script enough time to access the sockets dump. Such wait is introduced after shutting down one sockets end. Since commit 43b54c6ee382 ("mptcp: Use full MPTCP-level disconnect state machine") if both sides shutdown the socket is correctly transitioned into CLOSED status. As a side effect some sockets are not dumped via the diag interface, because the socket state (CLOSED) does not match the default filter, and this cause self-tests instability. Address the issue moving the above mentioned wait before shutting down the socket. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/68 Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Tested-and-acked-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index cad6f73a5fd0..090620c3e10c 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -406,10 +406,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) /* ... but we still receive. * Close our write side, ev. give some time - * for address notification + * for address notification and/or checking + * the current status */ - if (cfg_join) - usleep(400000); + if (cfg_wait) + usleep(cfg_wait); shutdown(peerfd, SHUT_WR); } else { if (errno == EINTR) @@ -427,7 +428,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_wait) + if (cfg_join) usleep(cfg_wait); close(peerfd); From 7ee2492635d862fac39bc5ef234ffd3d8e9f833b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Aug 2020 19:03:53 +0200 Subject: [PATCH 047/122] mptcp: fix warn at shutdown time for unaccepted msk sockets With commit b93df08ccda3 ("mptcp: explicitly track the fully established status"), the status of unaccepted mptcp closed in mptcp_sock_destruct() changes from TCP_SYN_RECV to TCP_ESTABLISHED. As a result mptcp_sock_destruct() does not perform the proper cleanup and inet_sock_destruct() will later emit a warn. Address the issue updating the condition tested in mptcp_sock_destruct(). Also update the related comment. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/66 Reported-and-tested-by: Christoph Paasch Fixes: b93df08ccda3 ("mptcp: explicitly track the fully established status") Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 96f4f2fe50ad..e8cac2655c82 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -423,12 +423,12 @@ static void mptcp_sock_destruct(struct sock *sk) * also remove the mptcp socket, via * sock_put(ctx->conn). * - * Problem is that the mptcp socket will not be in - * SYN_RECV state and doesn't have SOCK_DEAD flag. + * Problem is that the mptcp socket will be in + * ESTABLISHED state and will not have the SOCK_DEAD flag. * Both result in warnings from inet_sock_destruct. */ - if (sk->sk_state == TCP_SYN_RECV) { + if (sk->sk_state == TCP_ESTABLISHED) { sk->sk_state = TCP_CLOSE; WARN_ON_ONCE(sk->sk_socket); sock_orphan(sk); From 1c3b63f155f637594268cd1add8335461691b314 Mon Sep 17 00:00:00 2001 From: Rouven Czerwinski Date: Thu, 6 Aug 2020 08:49:06 +0200 Subject: [PATCH 048/122] net/tls: allow MSG_CMSG_COMPAT in sendmsg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trying to use ktls on a system with 32-bit userspace and 64-bit kernel results in a EOPNOTSUPP message during sendmsg: setsockopt(3, SOL_TLS, TLS_TX, …, 40) = 0 sendmsg(3, …, msg_flags=0}, 0) = -1 EOPNOTSUPP (Operation not supported) The tls_sw implementation does strict flag checking and does not allow the MSG_CMSG_COMPAT flag, which is set if the message comes in through the compat syscall. This patch adds MSG_CMSG_COMPAT to the flag check to allow the usage of the TLS SW implementation on systems using the compat syscall path. Note that the same check is present in the sendmsg path for the TLS device implementation, however the flag hasn't been added there for lack of testing hardware. Signed-off-by: Rouven Czerwinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 710bd44eaa49..9a3d9fedd7aa 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -935,7 +935,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int ret = 0; int pending; - if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) + if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_CMSG_COMPAT)) return -EOPNOTSUPP; mutex_lock(&tls_ctx->tx_lock); From d02cbc46136105cf86f84ac355e16f04696f538d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 6 Aug 2020 17:37:53 +0200 Subject: [PATCH 049/122] net: phy: fix memory leak in device-create error path A recent commit introduced a late error path in phy_device_create() which fails to release the device name allocated by dev_set_name(). Fixes: 13d0ab6750b2 ("net: phy: check return code when requesting PHY driver module") Cc: Heiner Kallweit Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1b9523595839..57d44648c8dd 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -615,7 +615,9 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, if (c45_ids) dev->c45_ids = *c45_ids; dev->irq = bus->irq[addr]; + dev_set_name(&mdiodev->dev, PHY_ID_FMT, bus->id, addr); + device_initialize(&mdiodev->dev); dev->state = PHY_DOWN; @@ -649,10 +651,8 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, ret = phy_request_driver_module(dev, phy_id); } - if (!ret) { - device_initialize(&mdiodev->dev); - } else { - kfree(dev); + if (ret) { + put_device(&mdiodev->dev); dev = ERR_PTR(ret); } From 6b07edebe6d31529346e553a7b309bc5251da712 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 19:52:24 +0800 Subject: [PATCH 050/122] net: Use helper function fdput() Use helper function fdput() to fput() the file iff FDPUT_FPUT is set. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/socket.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/socket.c b/net/socket.c index aff52e81653c..3c3d6abe4c1e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1804,8 +1804,7 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, ret = __sys_accept4_file(f.file, 0, upeer_sockaddr, upeer_addrlen, flags, rlimit(RLIMIT_NOFILE)); - if (f.flags) - fput(f.file); + fdput(f); } return ret; @@ -1868,8 +1867,7 @@ int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) ret = move_addr_to_kernel(uservaddr, addrlen, &address); if (!ret) ret = __sys_connect_file(f.file, &address, addrlen, 0); - if (f.flags) - fput(f.file); + fdput(f); } return ret; From ce787a5a074a86f76f5d3fd804fa78e01bfb9e89 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 19:53:16 +0800 Subject: [PATCH 051/122] net: Set fput_needed iff FDPUT_FPUT is set We should fput() file iff FDPUT_FPUT is set. So we should set fput_needed accordingly. Fixes: 00e188ef6a7e ("sockfd_lookup_light(): switch to fdget^W^Waway from fget_light") Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 3c3d6abe4c1e..e08415b4f939 100644 --- a/net/socket.c +++ b/net/socket.c @@ -500,7 +500,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) if (f.file) { sock = sock_from_file(f.file, err); if (likely(sock)) { - *fput_needed = f.flags; + *fput_needed = f.flags & FDPUT_FPUT; return sock; } fdput(f); From 47260ba937822cd1a57ee8b520b8789bdda5964e Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 19:54:19 +0800 Subject: [PATCH 052/122] net: Remove meaningless jump label out_fs The out_fs jump label has nothing to do but goto out. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/socket.c b/net/socket.c index e08415b4f939..c61f036d24f5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3063,7 +3063,7 @@ static int __init sock_init(void) err = register_filesystem(&sock_fs_type); if (err) - goto out_fs; + goto out; sock_mnt = kern_mount(&sock_fs_type); if (IS_ERR(sock_mnt)) { err = PTR_ERR(sock_mnt); @@ -3086,7 +3086,6 @@ static int __init sock_init(void) out_mount: unregister_filesystem(&sock_fs_type); -out_fs: goto out; } From 11f920d2aa9d4c2c2d53ccd79f4b5512f2169623 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 19:57:18 +0800 Subject: [PATCH 053/122] net: Use helper function ip_is_fragment() Use helper function ip_is_fragment() to check ip fragment. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2828f6d5ba89..7e2e502ef519 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4853,7 +4853,7 @@ static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate) if (err < 0) goto out; - if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) + if (ip_is_fragment(ip_hdr(skb))) fragment = true; off = ip_hdrlen(skb); From 7c7ab580db49cc7befe5f4b91bb1920cd6b07575 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 8 Aug 2020 16:23:10 +0800 Subject: [PATCH 054/122] net: Convert to use the fallthrough macro Convert the uses of fallthrough comments to fallthrough macro. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/socket.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/socket.c b/net/socket.c index c61f036d24f5..f4d5998bdcba 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1325,7 +1325,7 @@ int sock_wake_async(struct socket_wq *wq, int how, int band) case SOCK_WAKE_SPACE: if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags)) break; - /* fall through */ + fallthrough; case SOCK_WAKE_IO: call_kill: kill_fasync(&wq->fasync_list, SIGIO, band); @@ -3158,13 +3158,13 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) return -ENOMEM; buf_size += rule_cnt * sizeof(u32); - /* fall through */ + fallthrough; case ETHTOOL_GRXRINGS: case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_SRXCLSRLINS: convert_out = true; - /* fall through */ + fallthrough; case ETHTOOL_SRXCLSRLDEL: buf_size += sizeof(struct ethtool_rxnfc); convert_in = true; From b0102a890f17e965cc43283cb04218c03f85c6ff Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 7 Aug 2020 15:19:04 +0200 Subject: [PATCH 055/122] i2c: mediatek: Fix i2c_spec_values description The struct i2c_spec_values have it's members documented but is missing the starting '@', which leads to warings like: drivers/i2c/busses/i2c-mt65xx.c:267: warning: Function parameter or member 'min_low_ns' not described in 'i2c_spec_values' We also delete min_high_ns member as it is not used in the code. Signed-off-by: Matthias Brugger Reviewed-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index e889f74703e4..efc14041d45b 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -253,14 +253,13 @@ struct mtk_i2c { /** * struct i2c_spec_values: - * min_low_ns: min LOW period of the SCL clock - * min_su_sta_ns: min set-up time for a repeated START condition - * max_hd_dat_ns: max data hold time - * min_su_dat_ns: min data set-up time + * @min_low_ns: min LOW period of the SCL clock + * @min_su_sta_ns: min set-up time for a repeated START condition + * @max_hd_dat_ns: max data hold time + * @min_su_dat_ns: min data set-up time */ struct i2c_spec_values { unsigned int min_low_ns; - unsigned int min_high_ns; unsigned int min_su_sta_ns; unsigned int max_hd_dat_ns; unsigned int min_su_dat_ns; From 536e785f533f5a65ce5238e202d527d70fd4ab17 Mon Sep 17 00:00:00 2001 From: Vaibhav Gupta Date: Thu, 6 Aug 2020 01:06:15 +0530 Subject: [PATCH 056/122] i2c: eg20t: Drop PCI wakeup calls from .suspend/.resume The driver calls pci_enable_wake(...., false) in pch_i2c_suspend() as well as pch_i2c_resume(). Either it should enable-wake the device in .suspend() or should not invoke pci_enable_wake() at all. Concluding that this driver doesn't support enable-wake and PCI core calls pci_enable_wake(pci_dev, PCI_D0, false) during resume, drop it from .suspend() and .resume(). Reported-by: Bjorn Helgaas Signed-off-by: Vaibhav Gupta Reviewed-by: Bjorn Helgaas Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-eg20t.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index 73f139690e4e..eb41de22d461 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -879,7 +879,6 @@ static int pch_i2c_suspend(struct pci_dev *pdev, pm_message_t state) return ret; } - pci_enable_wake(pdev, PCI_D3hot, 0); pci_disable_device(pdev); pci_set_power_state(pdev, pci_choose_state(pdev, state)); @@ -899,8 +898,6 @@ static int pch_i2c_resume(struct pci_dev *pdev) return -EIO; } - pci_enable_wake(pdev, PCI_D3hot, 0); - for (i = 0; i < adap_info->ch_num; i++) pch_i2c_init(&adap_info->pch_data[i]); From 82c8eb22095fa747517845e7d5748ae2e4967359 Mon Sep 17 00:00:00 2001 From: Vaibhav Gupta Date: Thu, 6 Aug 2020 01:06:16 +0530 Subject: [PATCH 057/122] i2c: eg20t: use generic power management Drivers using legacy power management .suspen()/.resume() callbacks have to manage PCI states and device's PM states themselves. They also need to take care of standard configuration registers. Switch to generic power management framework using a single "struct dev_pm_ops" variable to take the unnecessary load from the driver. This also avoids the need for the driver to directly call most of the PCI helper functions and device power state control functions, as through the generic framework PCI Core takes care of the necessary operations, and drivers are required to do only device-specific jobs. Signed-off-by: Vaibhav Gupta Reviewed-by: Bjorn Helgaas Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-eg20t.c | 36 +++++++--------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index eb41de22d461..843b31a0f752 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -846,11 +846,10 @@ static void pch_i2c_remove(struct pci_dev *pdev) kfree(adap_info); } -#ifdef CONFIG_PM -static int pch_i2c_suspend(struct pci_dev *pdev, pm_message_t state) +static int __maybe_unused pch_i2c_suspend(struct device *dev) { - int ret; int i; + struct pci_dev *pdev = to_pci_dev(dev); struct adapter_info *adap_info = pci_get_drvdata(pdev); void __iomem *p = adap_info->pch_data[0].pch_base_address; @@ -872,31 +871,13 @@ static int pch_i2c_suspend(struct pci_dev *pdev, pm_message_t state) ioread32(p + PCH_I2CSR), ioread32(p + PCH_I2CBUFSTA), ioread32(p + PCH_I2CESRSTA)); - ret = pci_save_state(pdev); - - if (ret) { - pch_pci_err(pdev, "pci_save_state\n"); - return ret; - } - - pci_disable_device(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); - return 0; } -static int pch_i2c_resume(struct pci_dev *pdev) +static int __maybe_unused pch_i2c_resume(struct device *dev) { int i; - struct adapter_info *adap_info = pci_get_drvdata(pdev); - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - - if (pci_enable_device(pdev) < 0) { - pch_pci_err(pdev, "pch_i2c_resume:pci_enable_device FAILED\n"); - return -EIO; - } + struct adapter_info *adap_info = dev_get_drvdata(dev); for (i = 0; i < adap_info->ch_num; i++) pch_i2c_init(&adap_info->pch_data[i]); @@ -905,18 +886,15 @@ static int pch_i2c_resume(struct pci_dev *pdev) return 0; } -#else -#define pch_i2c_suspend NULL -#define pch_i2c_resume NULL -#endif + +static SIMPLE_DEV_PM_OPS(pch_i2c_pm_ops, pch_i2c_suspend, pch_i2c_resume); static struct pci_driver pch_pcidriver = { .name = KBUILD_MODNAME, .id_table = pch_pcidev_id, .probe = pch_i2c_probe, .remove = pch_i2c_remove, - .suspend = pch_i2c_suspend, - .resume = pch_i2c_resume + .driver.pm = &pch_i2c_pm_ops, }; module_pci_driver(pch_pcidriver); From 1e6e238c3002ea3611465ce5f32777ddd6a40126 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 28 Jul 2020 16:39:26 +0800 Subject: [PATCH 058/122] btrfs: inode: fix NULL pointer dereference if inode doesn't need compression [BUG] There is a bug report of NULL pointer dereference caused in compress_file_extent(): Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Workqueue: btrfs-delalloc btrfs_delalloc_helper [btrfs] NIP [c008000006dd4d34] compress_file_range.constprop.41+0x75c/0x8a0 [btrfs] LR [c008000006dd4d1c] compress_file_range.constprop.41+0x744/0x8a0 [btrfs] Call Trace: [c000000c69093b00] [c008000006dd4d1c] compress_file_range.constprop.41+0x744/0x8a0 [btrfs] (unreliable) [c000000c69093bd0] [c008000006dd4ebc] async_cow_start+0x44/0xa0 [btrfs] [c000000c69093c10] [c008000006e14824] normal_work_helper+0xdc/0x598 [btrfs] [c000000c69093c80] [c0000000001608c0] process_one_work+0x2c0/0x5b0 [c000000c69093d10] [c000000000160c38] worker_thread+0x88/0x660 [c000000c69093db0] [c00000000016b55c] kthread+0x1ac/0x1c0 [c000000c69093e20] [c00000000000b660] ret_from_kernel_thread+0x5c/0x7c ---[ end trace f16954aa20d822f6 ]--- [CAUSE] For the following execution route of compress_file_range(), it's possible to hit NULL pointer dereference: compress_file_extent() |- pages = NULL; |- start = async_chunk->start = 0; |- end = async_chunk = 4095; |- nr_pages = 1; |- inode_need_compress() == false; <<< Possible, see later explanation | Now, we have nr_pages = 1, pages = NULL |- cont: |- ret = cow_file_range_inline(); |- if (ret <= 0) { |- for (i = 0; i < nr_pages; i++) { |- WARN_ON(pages[i]->mapping); <<< Crash To enter above call execution branch, we need the following race: Thread 1 (chattr) | Thread 2 (writeback) --------------------------+------------------------------ | btrfs_run_delalloc_range | |- inode_need_compress = true | |- cow_file_range_async() btrfs_ioctl_set_flag() | |- binode_flags |= | BTRFS_INODE_NOCOMPRESS | | compress_file_range() | |- inode_need_compress = false | |- nr_page = 1 while pages = NULL | | Then hit the crash [FIX] This patch will fix it by checking @pages before doing accessing it. This patch is only designed as a hot fix and easy to backport. More elegant fix may make btrfs only check inode_need_compress() once to avoid such race, but that would be another story. Reported-by: Luciano Chavez Fixes: 4d3a800ebb12 ("btrfs: merge nr_pages input and output parameter in compress_pages") CC: stable@vger.kernel.org # 4.14.x: cecc8d9038d16: btrfs: Move free_pages_out label in inline extent handling branch in compress_file_range CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 611b3412fbfd..9988d754e465 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -653,12 +653,18 @@ static noinline int compress_file_range(struct async_chunk *async_chunk) page_error_op | PAGE_END_WRITEBACK); - for (i = 0; i < nr_pages; i++) { - WARN_ON(pages[i]->mapping); - put_page(pages[i]); + /* + * Ensure we only free the compressed pages if we have + * them allocated, as we can still reach here with + * inode_need_compress() == false. + */ + if (pages) { + for (i = 0; i < nr_pages; i++) { + WARN_ON(pages[i]->mapping); + put_page(pages[i]); + } + kfree(pages); } - kfree(pages); - return 0; } } From bf53d4687b8f3f6b752f091eb85f62369a515dfd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 27 Jul 2020 10:28:05 -0400 Subject: [PATCH 059/122] btrfs: only search for left_info if there is no right_info in try_merge_free_space In try_to_merge_free_space we attempt to find entries to the left and right of the entry we are adding to see if they can be merged. We search for an entry past our current info (saved into right_info), and then if right_info exists and it has a rb_prev() we save the rb_prev() into left_info. However there's a slight problem in the case that we have a right_info, but no entry previous to that entry. At that point we will search for an entry just before the info we're attempting to insert. This will simply find right_info again, and assign it to left_info, making them both the same pointer. Now if right_info _can_ be merged with the range we're inserting, we'll add it to the info and free right_info. However further down we'll access left_info, which was right_info, and thus get a use-after-free. Fix this by only searching for the left entry if we don't find a right entry at all. The CVE referenced had a specially crafted file system that could trigger this use-after-free. However with the tree checker improvements we no longer trigger the conditions for the UAF. But the original conditions still apply, hence this fix. Reference: CVE-2019-19448 Fixes: 963030817060 ("Btrfs: use hybrid extents+bitmap rb tree for free space") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6d961e11639e..ef0fd7afb0b1 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2282,7 +2282,7 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, bool update_stat) { - struct btrfs_free_space *left_info; + struct btrfs_free_space *left_info = NULL; struct btrfs_free_space *right_info; bool merged = false; u64 offset = info->offset; @@ -2298,7 +2298,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, if (right_info && rb_prev(&right_info->offset_index)) left_info = rb_entry(rb_prev(&right_info->offset_index), struct btrfs_free_space, offset_index); - else + else if (!right_info) left_info = tree_search_offset(ctl, offset - 1, 0, 0); /* See try_merge_free_space() comment. */ From 27942c9971cc405c60432eca9395e514a2ae9f5e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 23 Jul 2020 19:08:55 +0200 Subject: [PATCH 060/122] btrfs: fix messages after changing compression level by remount Reported by Forza on IRC that remounting with compression options does not reflect the change in level, or at least it does not appear to do so according to the messages: mount -o compress=zstd:1 /dev/sda /mnt mount -o remount,compress=zstd:15 /mnt does not print the change to the level to syslog: [ 41.366060] BTRFS info (device vda): use zstd compression, level 1 [ 41.368254] BTRFS info (device vda): disk space caching is enabled [ 41.390429] BTRFS info (device vda): disk space caching is enabled What really happens is that the message is lost but the level is actualy changed. There's another weird output, if compression is reset to 'no': [ 45.413776] BTRFS info (device vda): use no compression, level 4 To fix that, save the previous compression level and print the message in that case too and use separate message for 'no' compression. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: David Sterba --- fs/btrfs/super.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5a9dc31d95c9..aa73422b0678 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -517,6 +517,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, char *compress_type; bool compress_force = false; enum btrfs_compression_type saved_compress_type; + int saved_compress_level; bool saved_compress_force; int no_compress = 0; @@ -598,6 +599,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, info->compress_type : BTRFS_COMPRESS_NONE; saved_compress_force = btrfs_test_opt(info, FORCE_COMPRESS); + saved_compress_level = info->compress_level; if (token == Opt_compress || token == Opt_compress_force || strncmp(args[0].from, "zlib", 4) == 0) { @@ -642,6 +644,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, no_compress = 0; } else if (strncmp(args[0].from, "no", 2) == 0) { compress_type = "no"; + info->compress_level = 0; + info->compress_type = 0; btrfs_clear_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); compress_force = false; @@ -662,11 +666,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, */ btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); } - if ((btrfs_test_opt(info, COMPRESS) && - (info->compress_type != saved_compress_type || - compress_force != saved_compress_force)) || - (!btrfs_test_opt(info, COMPRESS) && - no_compress == 1)) { + if (no_compress == 1) { + btrfs_info(info, "use no compression"); + } else if ((info->compress_type != saved_compress_type) || + (compress_force != saved_compress_force) || + (info->compress_level != saved_compress_level)) { btrfs_info(info, "%s %s compression, level %d", (compress_force) ? "force" : "use", compress_type, info->compress_level); From 3ef3959b29c4a5bd65526ab310a1a18ae533172a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 22 Jul 2020 11:12:46 -0400 Subject: [PATCH 061/122] btrfs: don't show full path of bind mounts in subvol= Chris Murphy reported a problem where rpm ostree will bind mount a bunch of things for whatever voodoo it's doing. But when it does this /proc/mounts shows something like /dev/sda /mnt/test btrfs rw,relatime,subvolid=256,subvol=/foo 0 0 /dev/sda /mnt/test/baz btrfs rw,relatime,subvolid=256,subvol=/foo/bar 0 0 Despite subvolid=256 being subvol=/foo. This is because we're just spitting out the dentry of the mount point, which in the case of bind mounts is the source path for the mountpoint. Instead we should spit out the path to the actual subvol. Fix this by looking up the name for the subvolid we have mounted. With this fix the same test looks like this /dev/sda /mnt/test btrfs rw,relatime,subvolid=256,subvol=/foo 0 0 /dev/sda /mnt/test/baz btrfs rw,relatime,subvolid=256,subvol=/foo 0 0 Reported-by: Chris Murphy CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index aa73422b0678..9b4e9c4c4673 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1386,6 +1386,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) { struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); const char *compress_type; + const char *subvol_name; if (btrfs_test_opt(info, DEGRADED)) seq_puts(seq, ",degraded"); @@ -1472,8 +1473,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",ref_verify"); seq_printf(seq, ",subvolid=%llu", BTRFS_I(d_inode(dentry))->root->root_key.objectid); - seq_puts(seq, ",subvol="); - seq_dentry(seq, dentry, " \t\n\\"); + subvol_name = btrfs_get_subvol_name_from_objectid(info, + BTRFS_I(d_inode(dentry))->root->root_key.objectid); + if (!IS_ERR(subvol_name)) { + seq_puts(seq, ",subvol="); + seq_escape(seq, subvol_name, " \t\n\\"); + kfree(subvol_name); + } return 0; } From 4f26433e9b3eb7a55ed70d8f882ae9cd48ba448b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 29 Jul 2020 10:17:50 +0100 Subject: [PATCH 062/122] btrfs: fix memory leaks after failure to lookup checksums during inode logging While logging an inode, at copy_items(), if we fail to lookup the checksums for an extent we release the destination path, free the ins_data array and then return immediately. However a previous iteration of the for loop may have added checksums to the ordered_sums list, in which case we leak the memory used by them. So fix this by making sure we iterate the ordered_sums list and free all its checksums before returning. Fixes: 3650860b90cc2a ("Btrfs: remove almost all of the BUG()'s from tree-log.c") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ea8136dcf71f..696dd861cc3c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4036,11 +4036,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, fs_info->csum_root, ds + cs, ds + cs + cl - 1, &ordered_sums, 0); - if (ret) { - btrfs_release_path(dst_path); - kfree(ins_data); - return ret; - } + if (ret) + break; } } } @@ -4053,7 +4050,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, * we have to do this after the loop above to avoid changing the * log tree while trying to change the log tree. */ - ret = 0; while (!list_empty(&ordered_sums)) { struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, struct btrfs_ordered_sum, From faa008899a4db21a2df99833cb4ff6fa67009a20 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 30 Jul 2020 11:18:09 -0400 Subject: [PATCH 063/122] btrfs: make sure SB_I_VERSION doesn't get unset by remount There's some inconsistency around SB_I_VERSION handling with mount and remount. Since we don't really want it to be off ever just work around this by making sure we don't get the flag cleared on remount. There's a tiny cpu cost of setting the bit, otherwise all changes to i_version also change some of the times (ctime/mtime) so the inode needs to be synced. We wouldn't save anything by disabling it. Reported-by: Eric Sandeen CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ add perf impact analysis ] Signed-off-by: David Sterba --- fs/btrfs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9b4e9c4c4673..e529ddb35b87 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1960,6 +1960,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) set_bit(BTRFS_FS_OPEN, &fs_info->flags); } out: + /* + * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS, + * since the absence of the flag means it can be toggled off by remount. + */ + *flags |= SB_I_VERSION; + wake_up_process(fs_info->transaction_kthread); btrfs_remount_cleanup(fs_info, old_opts); clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); From c15c2ec07a26b251040943a1a9f90d3037f041e5 Mon Sep 17 00:00:00 2001 From: Boleyn Su Date: Thu, 6 Aug 2020 15:31:44 +0900 Subject: [PATCH 064/122] btrfs: check correct variable after allocation in btrfs_backref_iter_alloc The `if (!ret)` check will always be false and it may result in ret->path being dereferenced while it is a NULL pointer. Fixes: a37f232b7b65 ("btrfs: backref: introduce the skeleton of btrfs_backref_iter") CC: stable@vger.kernel.org # 5.8+ Reviewed-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: Boleyn Su Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/backref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ea10f7bc99ab..ea1c28ccb44f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2303,7 +2303,7 @@ struct btrfs_backref_iter *btrfs_backref_iter_alloc( return NULL; ret->path = btrfs_alloc_path(); - if (!ret) { + if (!ret->path) { kfree(ret); return NULL; } From 62ab2cc04ddc5dfab4e382ae167734fc111da5ed Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 3 Aug 2020 14:20:11 +0800 Subject: [PATCH 065/122] btrfs: sysfs: fix NULL pointer dereference at btrfs_sysfs_del_qgroups() [BUG] Unmounting a btrfs filesystem with quota disabled will cause the following NULL pointer dereference: BTRFS info (device dm-5): has skinny extents BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page CPU: 7 PID: 637 Comm: umount Not tainted 5.8.0-rc7-next-20200731-custom #76 RIP: 0010:kobject_del+0x6/0x20 Call Trace: btrfs_sysfs_del_qgroups+0xac/0xf0 [btrfs] btrfs_free_qgroup_config+0x63/0x70 [btrfs] close_ctree+0x1f5/0x323 [btrfs] btrfs_put_super+0x15/0x17 [btrfs] generic_shutdown_super+0x72/0x110 kill_anon_super+0x18/0x30 btrfs_kill_super+0x17/0x30 [btrfs] deactivate_locked_super+0x3b/0xa0 deactivate_super+0x40/0x50 cleanup_mnt+0x135/0x190 __cleanup_mnt+0x12/0x20 task_work_run+0x64/0xb0 exit_to_user_mode_prepare+0x18a/0x190 syscall_exit_to_user_mode+0x4f/0x270 do_syscall_64+0x45/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ---[ end trace 37b7adca5c1d5c5d ]--- [CAUSE] Commit 079ad2fb4bf9 ("kobject: Avoid premature parent object freeing in kobject_cleanup()") changed kobject_del() that it no longer accepts NULL pointer. Before that commit, kobject_del() and kobject_put() all accept NULL pointers and just ignore such NULL pointers. But that mentioned commit needs to access the parent node, killing the old NULL pointer behavior. Unfortunately btrfs is relying on that hidden feature thus we will trigger such NULL pointer dereference. [FIX] Instead of just saving several lines, do proper fs_info->qgroups_kobj check before calling kobject_del() and kobject_put(). Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 104c80caaa74..c8df2edafd85 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1565,9 +1565,11 @@ void btrfs_sysfs_del_qgroups(struct btrfs_fs_info *fs_info) rbtree_postorder_for_each_entry_safe(qgroup, next, &fs_info->qgroup_tree, node) btrfs_sysfs_del_one_qgroup(fs_info, qgroup); - kobject_del(fs_info->qgroups_kobj); - kobject_put(fs_info->qgroups_kobj); - fs_info->qgroups_kobj = NULL; + if (fs_info->qgroups_kobj) { + kobject_del(fs_info->qgroups_kobj); + kobject_put(fs_info->qgroups_kobj); + fs_info->qgroups_kobj = NULL; + } } /* Called when qgroups get initialized, thus there is no need for locking */ From 519a8a6cf91dda095be2d36216fc4ebc525270a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Aug 2020 18:42:14 +0200 Subject: [PATCH 066/122] net: Revert "net: optimize the sockptr_t for unified kernel/user address spaces" This reverts commits 6d04fe15f78acdf8e32329e208552e226f7a8ae6 and a31edb2059ed4e498f9aa8230c734b59d0ad797a. It turns out the idea to share a single pointer for both kernel and user space address causes various kinds of problems. So use the slightly less optimal version that uses an extra bit, but which is guaranteed to be safe everywhere. Fixes: 6d04fe15f78a ("net: optimize the sockptr_t for unified kernel/user address spaces") Reported-by: Eric Dumazet Reported-by: John Stultz Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/sockptr.h | 26 ++------------------------ net/ipv4/bpfilter/sockopt.c | 14 ++++++-------- net/socket.c | 6 +----- 3 files changed, 9 insertions(+), 37 deletions(-) diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 96840def9d69..ea193414298b 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -8,26 +8,9 @@ #ifndef _LINUX_SOCKPTR_H #define _LINUX_SOCKPTR_H -#include #include #include -#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE -typedef union { - void *kernel; - void __user *user; -} sockptr_t; - -static inline bool sockptr_is_kernel(sockptr_t sockptr) -{ - return (unsigned long)sockptr.kernel >= TASK_SIZE; -} - -static inline sockptr_t KERNEL_SOCKPTR(void *p) -{ - return (sockptr_t) { .kernel = p }; -} -#else /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ typedef struct { union { void *kernel; @@ -45,15 +28,10 @@ static inline sockptr_t KERNEL_SOCKPTR(void *p) { return (sockptr_t) { .kernel = p, .is_kernel = true }; } -#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ -static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p, - size_t size) +static inline sockptr_t USER_SOCKPTR(void __user *p) { - if (!access_ok(p, size)) - return -EFAULT; - *sp = (sockptr_t) { .user = p }; - return 0; + return (sockptr_t) { .user = p }; } static inline bool sockptr_is_null(sockptr_t sockptr) diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 545b2640f019..1b34cb9a7708 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -57,18 +57,16 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, return bpfilter_mbox_request(sk, optname, optval, optlen, true); } -int bpfilter_ip_get_sockopt(struct sock *sk, int optname, - char __user *user_optval, int __user *optlen) +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, + int __user *optlen) { - sockptr_t optval; - int err, len; + int len; if (get_user(len, optlen)) return -EFAULT; - err = init_user_sockptr(&optval, user_optval, len); - if (err) - return err; - return bpfilter_mbox_request(sk, optname, optval, len, false); + + return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len, + false); } static int __init bpfilter_sockopt_init(void) diff --git a/net/socket.c b/net/socket.c index f4d5998bdcba..dbbe8ea7d395 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2095,7 +2095,7 @@ static bool sock_use_custom_sol_socket(const struct socket *sock) int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, int optlen) { - sockptr_t optval; + sockptr_t optval = USER_SOCKPTR(user_optval); char *kernel_optval = NULL; int err, fput_needed; struct socket *sock; @@ -2103,10 +2103,6 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, if (optlen < 0) return -EINVAL; - err = init_user_sockptr(&optval, user_optval, optlen); - if (err) - return err; - sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) return err; From 8a7f280f29a80f6e0798f5d6e07c5dd8726620fe Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Mon, 10 Aug 2020 09:55:55 -0700 Subject: [PATCH 067/122] vmxnet3: use correct tcp hdr length when packet is encapsulated Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") added support for encapsulation offload. However, while calculating tcp hdr length, it does not take into account if the packet is encapsulated or not. This patch fixes this issue by using correct reference for inner tcp header. Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index ca395f9679d0..2818015324b8 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -886,7 +886,8 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, switch (protocol) { case IPPROTO_TCP: - ctx->l4_hdr_size = tcp_hdrlen(skb); + ctx->l4_hdr_size = skb->encapsulation ? inner_tcp_hdrlen(skb) : + tcp_hdrlen(skb); break; case IPPROTO_UDP: ctx->l4_hdr_size = sizeof(struct udphdr); From 56e287b3daa20a95e0756e016362f2e96158e1a3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 10 Aug 2020 10:32:04 -0700 Subject: [PATCH 068/122] nfp: update maintainer I'm not doing much work on the NFP driver any more. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d2784b502da0..83ea07711518 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11948,7 +11948,8 @@ F: include/uapi/linux/netrom.h F: net/netrom/ NETRONOME ETHERNET DRIVERS -M: Jakub Kicinski +M: Simon Horman +R: Jakub Kicinski L: oss-drivers@netronome.com S: Maintained F: drivers/net/ethernet/netronome/ From f19008e676366c44e9241af57f331b6c6edf9552 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2020 13:38:39 -0400 Subject: [PATCH 069/122] tcp: correct read of TFO keys on big endian systems When TFO keys are read back on big endian systems either via the global sysctl interface or via getsockopt() using TCP_FASTOPEN_KEY, the values don't match what was written. For example, on s390x: # echo "1-2-3-4" > /proc/sys/net/ipv4/tcp_fastopen_key # cat /proc/sys/net/ipv4/tcp_fastopen_key 02000000-01000000-04000000-03000000 Instead of: # cat /proc/sys/net/ipv4/tcp_fastopen_key 00000001-00000002-00000003-00000004 Fix this by converting to the correct endianness on read. This was reported by Colin Ian King when running the 'tcp_fastopen_backup_key' net selftest on s390x, which depends on the read value matching what was written. I've confirmed that the test now passes on big and little endian systems. Signed-off-by: Jason Baron Fixes: 438ac88009bc ("net: fastopen: robustness and endianness fixes for SipHash") Cc: Ard Biesheuvel Cc: Eric Dumazet Reported-and-tested-by: Colin Ian King Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/sysctl_net_ipv4.c | 16 ++++------------ net/ipv4/tcp.c | 16 ++++------------ net/ipv4/tcp_fastopen.c | 23 +++++++++++++++++++++++ 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index dbf5c791a6eb..eab6c7510b5b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1672,6 +1672,8 @@ void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_ctx_destroy(struct net *net); int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, void *primary_key, void *backup_key); +int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, + u64 *key); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5653e3b011bf..54023a46db04 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -301,24 +301,16 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write, struct ctl_table tbl = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2 * TCP_FASTOPEN_KEY_MAX) + (TCP_FASTOPEN_KEY_MAX * 5)) }; - struct tcp_fastopen_context *ctx; - u32 user_key[TCP_FASTOPEN_KEY_MAX * 4]; - __le32 key[TCP_FASTOPEN_KEY_MAX * 4]; + u32 user_key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u32)]; + __le32 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(__le32)]; char *backup_data; - int ret, i = 0, off = 0, n_keys = 0; + int ret, i = 0, off = 0, n_keys; tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); if (!tbl.data) return -ENOMEM; - rcu_read_lock(); - ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); - if (ctx) { - n_keys = tcp_fastopen_context_len(ctx); - memcpy(&key[0], &ctx->key[0], TCP_FASTOPEN_KEY_LENGTH * n_keys); - } - rcu_read_unlock(); - + n_keys = tcp_fastopen_get_cipher(net, NULL, (u64 *)key); if (!n_keys) { memset(&key[0], 0, TCP_FASTOPEN_KEY_LENGTH); n_keys = 1; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c06d2bfd2ec4..31f3b858db81 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3685,22 +3685,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return 0; case TCP_FASTOPEN_KEY: { - __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH]; - struct tcp_fastopen_context *ctx; - unsigned int key_len = 0; + u64 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u64)]; + unsigned int key_len; if (get_user(len, optlen)) return -EFAULT; - rcu_read_lock(); - ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); - if (ctx) { - key_len = tcp_fastopen_context_len(ctx) * - TCP_FASTOPEN_KEY_LENGTH; - memcpy(&key[0], &ctx->key[0], key_len); - } - rcu_read_unlock(); - + key_len = tcp_fastopen_get_cipher(net, icsk, key) * + TCP_FASTOPEN_KEY_LENGTH; len = min_t(unsigned int, len, key_len); if (put_user(len, optlen)) return -EFAULT; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 19ad9586c720..1bb85821f1e6 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -108,6 +108,29 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, return err; } +int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, + u64 *key) +{ + struct tcp_fastopen_context *ctx; + int n_keys = 0, i; + + rcu_read_lock(); + if (icsk) + ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); + else + ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); + if (ctx) { + n_keys = tcp_fastopen_context_len(ctx); + for (i = 0; i < n_keys; i++) { + put_unaligned_le64(ctx->key[i].key[0], key + (i * 2)); + put_unaligned_le64(ctx->key[i].key[1], key + (i * 2) + 1); + } + } + rcu_read_unlock(); + + return n_keys; +} + static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, struct sk_buff *syn, const siphash_key_t *key, From 444da3f52407d74c9aa12187ac6b01f76ee47d62 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 10 Aug 2020 11:21:11 -0700 Subject: [PATCH 070/122] bitfield.h: don't compile-time validate _val in FIELD_FIT When ur_load_imm_any() is inlined into jeq_imm(), it's possible for the compiler to deduce a case where _val can only have the value of -1 at compile time. Specifically, /* struct bpf_insn: _s32 imm */ u64 imm = insn->imm; /* sign extend */ if (imm >> 32) { /* non-zero only if insn->imm is negative */ /* inlined from ur_load_imm_any */ u32 __imm = imm >> 32; /* therefore, always 0xffffffff */ if (__builtin_constant_p(__imm) && __imm > 255) compiletime_assert_XXX() This can result in tripping a BUILD_BUG_ON() in __BF_FIELD_CHECK() that checks that a given value is representable in one byte (interpreted as unsigned). FIELD_FIT() should return true or false at runtime for whether a value can fit for not. Don't break the build over a value that's too large for the mask. We'd prefer to keep the inlining and compiler optimizations though we know this case will always return false. Cc: stable@vger.kernel.org Fixes: 1697599ee301a ("bitfield.h: add FIELD_FIT() helper") Link: https://lore.kernel.org/kernel-hardening/CAK7LNASvb0UDJ0U5wkYYRzTAdnEs64HjXpEUL7d=V0CXiAXcNw@mail.gmail.com/ Reported-by: Masahiro Yamada Debugged-by: Sami Tolvanen Signed-off-by: Jakub Kicinski Signed-off-by: Nick Desaulniers Signed-off-by: David S. Miller --- include/linux/bitfield.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 48ea093ff04c..4e035aca6f7e 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -77,7 +77,7 @@ */ #define FIELD_FIT(_mask, _val) \ ({ \ - __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_FIT: "); \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \ !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \ }) From 881a3a11c2b858fe9b69ef79ac5ee9978a266dc9 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 3 Aug 2020 11:35:06 +0200 Subject: [PATCH 071/122] btrfs: fix return value mixup in btrfs_get_extent btrfs_get_extent() sets variable ret, but out: error path expect error to be in variable err so the error code is lost. Fixes: 6bf9e4bd6a27 ("btrfs: inode: Verify inode mode to avoid NULL pointer dereference") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Pavel Machek (CIP) Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9988d754e465..16ce82039dcf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6627,7 +6627,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, extent_type == BTRFS_FILE_EXTENT_PREALLOC) { /* Only regular file could have regular/prealloc extent */ if (!S_ISREG(inode->vfs_inode.i_mode)) { - ret = -EUCLEAN; + err = -EUCLEAN; btrfs_crit(fs_info, "regular/prealloc extent found for non-regular inode %llu", btrfs_ino(inode)); From f0cbd3b83ed47803df941865f720934c69abb803 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 30 Jul 2020 16:02:28 +0200 Subject: [PATCH 072/122] s390/atomic: circumvent gcc 10 build regression Circumvent the following gcc 10 allyesconfig build regression: CC drivers/leds/trigger/ledtrig-cpu.o In file included from ./arch/s390/include/asm/bitops.h:39, from ./include/linux/bitops.h:29, from ./include/linux/kernel.h:12, from drivers/leds/trigger/ledtrig-cpu.c:18: ./arch/s390/include/asm/atomic_ops.h: In function 'ledtrig_cpu': ./arch/s390/include/asm/atomic_ops.h:46:2: warning: 'asm' operand 1 probably does not match constraints 46 | asm volatile( \ | ^~~ ./arch/s390/include/asm/atomic_ops.h:53:2: note: in expansion of macro '__ATOMIC_CONST_OP' 53 | __ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \ | ^~~~~~~~~~~~~~~~~ ./arch/s390/include/asm/atomic_ops.h:56:1: note: in expansion of macro '__ATOMIC_CONST_OPS' 56 | __ATOMIC_CONST_OPS(__atomic_add_const, int, "asi") | ^~~~~~~~~~~~~~~~~~ ./arch/s390/include/asm/atomic_ops.h:46:2: error: impossible constraint in 'asm' 46 | asm volatile( \ | ^~~ ./arch/s390/include/asm/atomic_ops.h:53:2: note: in expansion of macro '__ATOMIC_CONST_OP' 53 | __ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \ | ^~~~~~~~~~~~~~~~~ ./arch/s390/include/asm/atomic_ops.h:56:1: note: in expansion of macro '__ATOMIC_CONST_OPS' 56 | __ATOMIC_CONST_OPS(__atomic_add_const, int, "asi") | ^~~~~~~~~~~~~~~~~~ scripts/Makefile.build:280: recipe for target 'drivers/leds/trigger/ledtrig-cpu.o' failed By swapping conditions as proposed here: https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html Suggested-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/include/asm/atomic.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index cae473a7b6f7..11c5952e1afa 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -45,7 +45,11 @@ static inline int atomic_fetch_add(int i, atomic_t *v) static inline void atomic_add(int i, atomic_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + /* + * Order of conditions is important to circumvent gcc 10 bug: + * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html + */ + if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { __atomic_add_const(i, &v->counter); return; } @@ -112,7 +116,11 @@ static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) static inline void atomic64_add(s64 i, atomic64_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + /* + * Order of conditions is important to circumvent gcc 10 bug: + * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html + */ + if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { __atomic64_add_const(i, (long *)&v->counter); return; } From ba925fa35057a062ac98c3e8138b013ce4ce351c Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 29 Jul 2020 22:22:34 +0200 Subject: [PATCH 073/122] s390/gmap: improve THP splitting During s390_enable_sie(), we need to take care of splitting all qemu user process THP mappings. This is currently done with follow_page(FOLL_SPLIT), by simply iterating over all vma ranges, with PAGE_SIZE increment. This logic is sub-optimal and can result in a lot of unnecessary overhead, especially when using qemu and ASAN with large shadow map. Ilya reported significant system slow-down with one CPU busy for a long time and overall unresponsiveness. Fix this by using walk_page_vma() and directly calling split_huge_pmd() only for present pmds, which greatly reduces overhead. Cc: # v5.4+ Reported-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Acked-by: Christian Borntraeger Signed-off-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- arch/s390/mm/gmap.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 190357ff86b3..46c1bf2a3b4b 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2485,23 +2485,36 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], } EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + + split_huge_pmd(vma, pmd, addr); + return 0; +} + +static const struct mm_walk_ops thp_split_walk_ops = { + .pmd_entry = thp_split_walk_pmd_entry, +}; + static inline void thp_split_mm(struct mm_struct *mm) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE struct vm_area_struct *vma; - unsigned long addr; for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { - for (addr = vma->vm_start; - addr < vma->vm_end; - addr += PAGE_SIZE) - follow_page(vma, addr, FOLL_SPLIT); vma->vm_flags &= ~VM_HUGEPAGE; vma->vm_flags |= VM_NOHUGEPAGE; + walk_page_vma(vma, &thp_split_walk_ops, NULL); } mm->def_flags |= VM_NOHUGEPAGE; -#endif } +#else +static inline void thp_split_mm(struct mm_struct *mm) +{ +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* * Remove all empty zero pages from the mapping for lazy refaulting From 75d3e7f4769d276a056efa1cc7f08de571fc9b4b Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 30 Jul 2020 14:36:02 +0800 Subject: [PATCH 074/122] s390/test_unwind: fix possible memleak in test_unwind() test_unwind() misses to call kfree(bt) in an error path. Add the missed function call to fix it. Fixes: 0610154650f1 ("s390/test_unwind: print verbose unwinding results") Reported-by: Hulk Robot Signed-off-by: Wang Hai Acked-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens --- arch/s390/lib/test_unwind.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index eb382ceaa116..7c988994931f 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -64,6 +64,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, break; if (state.reliable && !addr) { pr_err("unwind state reliable but addr is 0\n"); + kfree(bt); return -EINVAL; } sprint_symbol(sym, addr); From 75be6b98eda46d27b4a218fdbfb034a5b0fb5b12 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 2 Aug 2020 19:15:26 +0800 Subject: [PATCH 075/122] s390/pkey: remove redundant variable initialization In the first place, the initialization value of `rc` is wrong. It is unnecessary to initialize `rc` variables, so remove their initialization operation. Fixes: f2bbc96e7cfad ("s390/pkey: add CCA AES cipher key support") Signed-off-by: Tianjia Zhang Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- drivers/s390/crypto/pkey_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index d5880f52dc2b..5896e5282a4e 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -818,7 +818,7 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns, static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags, struct pkey_apqn *apqns, size_t *nr_apqns) { - int rc = EINVAL; + int rc; u32 _nr_apqns, *_apqns = NULL; struct keytoken_header *hdr = (struct keytoken_header *)key; @@ -886,7 +886,7 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype, u8 cur_mkvp[32], u8 alt_mkvp[32], u32 flags, struct pkey_apqn *apqns, size_t *nr_apqns) { - int rc = -EINVAL; + int rc; u32 _nr_apqns, *_apqns = NULL; if (ktype == PKEY_TYPE_CCA_DATA || ktype == PKEY_TYPE_CCA_CIPHER) { From 535e4fc623fab2e09a0653fc3a3e17f382ad0251 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 4 Aug 2020 20:35:49 +0200 Subject: [PATCH 076/122] s390/numa: set node distance to LOCAL_DISTANCE The node distance is hardcoded to 0, which causes a trouble for some user-level applications. In particular, "libnuma" expects the distance of a node to itself as LOCAL_DISTANCE. This update removes the offending node distance override. Cc: # 4.4 Fixes: 3a368f742da1 ("s390/numa: add core infrastructure") Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/include/asm/topology.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index fbb507504a3b..3a0ac0c7a9a3 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -86,12 +86,6 @@ static inline const struct cpumask *cpumask_of_node(int node) #define pcibus_to_node(bus) __pcibus_to_node(bus) -#define node_distance(a, b) __node_distance(a, b) -static inline int __node_distance(int a, int b) -{ - return 0; -} - #else /* !CONFIG_NUMA */ #define numa_node_id numa_node_id From 929a343b858612100cb09443a8aaa20d4a4706d3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 5 Aug 2020 17:50:53 +0200 Subject: [PATCH 077/122] s390/Kconfig: add missing ZCRYPT dependency to VFIO_AP The VFIO_AP uses ap_driver_register() (and deregister) functions implemented in ap_bus.c (compiled into ap.o). However the ap.o will be built only if CONFIG_ZCRYPT is selected. This was not visible before commit e93a1695d7fb ("iommu: Enable compile testing for some of drivers") because the CONFIG_VFIO_AP depends on CONFIG_S390_AP_IOMMU which depends on the missing CONFIG_ZCRYPT. After adding COMPILE_TEST, it is possible to select a configuration with VFIO_AP and S390_AP_IOMMU but without the ZCRYPT. Add proper dependency to the VFIO_AP to fix build errors: ERROR: modpost: "ap_driver_register" [drivers/s390/crypto/vfio_ap.ko] undefined! ERROR: modpost: "ap_driver_unregister" [drivers/s390/crypto/vfio_ap.ko] undefined! Reported-by: kernel test robot Fixes: e93a1695d7fb ("iommu: Enable compile testing for some of drivers") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8c0b52940165..2a31a5e74e42 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -766,6 +766,7 @@ config VFIO_AP def_tristate n prompt "VFIO support for AP devices" depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM + depends on ZCRYPT help This driver grants access to Adjunct Processor (AP) devices via the VFIO mediated device interface. From 0990d836cecb207071492f5679d5b07b26574205 Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Tue, 5 May 2020 10:34:52 +0200 Subject: [PATCH 078/122] s390/debug: debug feature version 3 Change __debug_entry structure in the following way: - remove redundant union - Field containing cpuid is expanded to 16 bits. 8-bit width was not enough since we already support up to 512 cpus. - Field containing the timestamp is expanded to 60 bits. The timestamp itself is now stored in the absolute Unix time format in microseconds taking the Epoch Index into acount. Adjust default header for debug entries by setting minimum width for cpuid to 4 digits. Reviewed-by: Heiko Carstens Signed-off-by: Mikhail Zaslonko Signed-off-by: Heiko Carstens --- arch/s390/include/asm/debug.h | 17 ++++++----------- arch/s390/kernel/debug.c | 32 ++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 17a26261f288..c1b82bcc017c 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -2,7 +2,7 @@ /* * S/390 debug facility * - * Copyright IBM Corp. 1999, 2000 + * Copyright IBM Corp. 1999, 2020 */ #ifndef DEBUG_H #define DEBUG_H @@ -26,19 +26,14 @@ #define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */ /* the entry information */ -#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ +#define __DEBUG_FEATURE_VERSION 3 /* version of debug feature */ struct __debug_entry { - union { - struct { - unsigned long clock : 52; - unsigned long exception : 1; - unsigned long level : 3; - unsigned long cpuid : 8; - } fields; - unsigned long stck; - } id; + unsigned long clock : 60; + unsigned long exception : 1; + unsigned long level : 3; void *caller; + unsigned short cpu; } __packed; typedef struct __debug_entry debug_entry_t; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index beb4b44a11d1..b6619ae9a3e0 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -2,7 +2,7 @@ /* * S/390 debug facility * - * Copyright IBM Corp. 1999, 2012 + * Copyright IBM Corp. 1999, 2020 * * Author(s): Michael Holzheu (holzheu@de.ibm.com), * Holger Smolinski (Holger.Smolinski@de.ibm.com) @@ -433,7 +433,7 @@ static int debug_format_entry(file_private_info_t *p_info) act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area] [p_info->act_page] + p_info->act_entry); - if (act_entry->id.stck == 0LL) + if (act_entry->clock == 0LL) goto out; /* empty entry */ if (view->header_proc) len += view->header_proc(id_snap, view, p_info->act_area, @@ -829,12 +829,17 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id) static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active, int level, int exception) { - active->id.stck = get_tod_clock_fast() - - *(unsigned long long *) &tod_clock_base[1]; - active->id.fields.cpuid = smp_processor_id(); + unsigned char clk[STORE_CLOCK_EXT_SIZE]; + unsigned long timestamp; + + get_tod_clock_ext(clk); + timestamp = *(unsigned long *) &clk[0] >> 4; + timestamp -= TOD_UNIX_EPOCH >> 12; + active->clock = timestamp; + active->cpu = smp_processor_id(); active->caller = __builtin_return_address(0); - active->id.fields.exception = exception; - active->id.fields.level = level; + active->exception = exception; + active->level = level; proceed_active_entry(id); if (exception) proceed_active_area(id); @@ -1398,25 +1403,24 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, int area, debug_entry_t *entry, char *out_buf) { - unsigned long base, sec, usec; + unsigned long sec, usec; unsigned long caller; unsigned int level; char *except_str; int rc = 0; - level = entry->id.fields.level; - base = (*(unsigned long *) &tod_clock_base[0]) >> 4; - sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12); + level = entry->level; + sec = entry->clock; usec = do_div(sec, USEC_PER_SEC); - if (entry->id.fields.exception) + if (entry->exception) except_str = "*"; else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ", + rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %pK ", area, sec, usec, level, except_str, - entry->id.fields.cpuid, (void *)caller); + entry->cpu, (void *)caller); return rc; } EXPORT_SYMBOL(debug_dflt_header_fn); From 12bbf0962afa903321f28c454d7b667199ae55da Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 5 Aug 2020 14:50:41 +0200 Subject: [PATCH 079/122] s390/time: remove select CLOCKSOURCE_VALIDATE_LAST_CYCLE again Sven Schnelle reported that setting CLOCKSOURCE_VALIDATE_LAST_CYCLE doesn't make sense: even if our tod clock overflows delta calculation (now - last) with unsigned 64 bit values will still be correct. Therefore revert commit 555701a714f7 ("s390/time: select CLOCKSOURCE_VALIDATE_LAST_CYCLE"). Fixes: 555701a714f7 ("s390/time: select CLOCKSOURCE_VALIDATE_LAST_CYCLE") Reported-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2a31a5e74e42..3d86e12e8e3c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -126,7 +126,6 @@ config S390 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC - select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY From b450eeb0c973ed4125ea91e35613f029337fd28b Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 4 Aug 2020 20:35:50 +0200 Subject: [PATCH 080/122] s390/numa: move code to arch/s390/kernel Move all code from arch/s390/numa/ to arch/s390/kernel/ since numa.c is the only source file and all others were deleted with the fake NUMA support removal. Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/Kbuild | 1 - arch/s390/kernel/Makefile | 1 + arch/s390/{numa => kernel}/numa.c | 0 arch/s390/numa/Makefile | 2 -- 4 files changed, 1 insertion(+), 3 deletions(-) rename arch/s390/{numa => kernel}/numa.c (100%) delete mode 100644 arch/s390/numa/Makefile diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index e63940bb57cd..8b98c501142d 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -7,5 +7,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-y += net/ obj-$(CONFIG_PCI) += pci/ -obj-$(CONFIG_NUMA) += numa/ obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index a8f136943deb..efca70970761 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -49,6 +49,7 @@ CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o diff --git a/arch/s390/numa/numa.c b/arch/s390/kernel/numa.c similarity index 100% rename from arch/s390/numa/numa.c rename to arch/s390/kernel/numa.c diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile deleted file mode 100644 index c89d26f4f77d..000000000000 --- a/arch/s390/numa/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-y += numa.o From 1fc0fd6739bcaf282f5ffb26ca082aea5a45a7b3 Mon Sep 17 00:00:00 2001 From: Bryan Brattlof Date: Tue, 11 Aug 2020 16:17:12 +0000 Subject: [PATCH 081/122] docs: trace: fix a typo emumerated -> enumerated Signed-off-by: Bryan Brattlof Link: https://lore.kernel.org/r/87lfili2d8.fsf@bryanbrattlof.com Signed-off-by: Jonathan Corbet --- Documentation/trace/intel_th.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/trace/intel_th.rst b/Documentation/trace/intel_th.rst index 70b7126eaeeb..b31818d5f6c5 100644 --- a/Documentation/trace/intel_th.rst +++ b/Documentation/trace/intel_th.rst @@ -58,7 +58,7 @@ Bus and Subdevices For each Intel TH device in the system a bus of its own is created and assigned an id number that reflects the order in which TH -devices were emumerated. All TH subdevices (devices on intel_th bus) +devices were enumerated. All TH subdevices (devices on intel_th bus) begin with this id: 0-gth, 0-msc0, 0-msc1, 0-pti, 0-sth, which is followed by device's name and an optional index. From 7caf3e3f1703650ea7d7221aba3ae4b3a798b89a Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Tue, 11 Aug 2020 00:18:28 +0530 Subject: [PATCH 082/122] Filesystems: Documentation: Replace deprecated :c:func: Usage Replace :c:func: with func() as the previous usage is deprecated. Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20200810184828.29297-1-puranjay12@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/filesystems/journalling.rst | 66 +++++++++++------------ 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/Documentation/filesystems/journalling.rst b/Documentation/filesystems/journalling.rst index 58ce6b395206..7e2be2faf653 100644 --- a/Documentation/filesystems/journalling.rst +++ b/Documentation/filesystems/journalling.rst @@ -10,27 +10,27 @@ Details The journalling layer is easy to use. You need to first of all create a journal_t data structure. There are two calls to do this dependent on how you decide to allocate the physical media on which the journal -resides. The :c:func:`jbd2_journal_init_inode` call is for journals stored in -filesystem inodes, or the :c:func:`jbd2_journal_init_dev` call can be used +resides. The jbd2_journal_init_inode() call is for journals stored in +filesystem inodes, or the jbd2_journal_init_dev() call can be used for journal stored on a raw device (in a continuous range of blocks). A journal_t is a typedef for a struct pointer, so when you are finally -finished make sure you call :c:func:`jbd2_journal_destroy` on it to free up +finished make sure you call jbd2_journal_destroy() on it to free up any used kernel memory. Once you have got your journal_t object you need to 'mount' or load the journal file. The journalling layer expects the space for the journal was already allocated and initialized properly by the userspace tools. -When loading the journal you must call :c:func:`jbd2_journal_load` to process +When loading the journal you must call jbd2_journal_load() to process journal contents. If the client file system detects the journal contents does not need to be processed (or even need not have valid contents), it -may call :c:func:`jbd2_journal_wipe` to clear the journal contents before -calling :c:func:`jbd2_journal_load`. +may call jbd2_journal_wipe() to clear the journal contents before +calling jbd2_journal_load(). Note that jbd2_journal_wipe(..,0) calls -:c:func:`jbd2_journal_skip_recovery` for you if it detects any outstanding -transactions in the journal and similarly :c:func:`jbd2_journal_load` will -call :c:func:`jbd2_journal_recover` if necessary. I would advise reading -:c:func:`ext4_load_journal` in fs/ext4/super.c for examples on this stage. +jbd2_journal_skip_recovery() for you if it detects any outstanding +transactions in the journal and similarly jbd2_journal_load() will +call jbd2_journal_recover() if necessary. I would advise reading +ext4_load_journal() in fs/ext4/super.c for examples on this stage. Now you can go ahead and start modifying the underlying filesystem. Almost. @@ -39,57 +39,57 @@ You still need to actually journal your filesystem changes, this is done by wrapping them into transactions. Additionally you also need to wrap the modification of each of the buffers with calls to the journal layer, so it knows what the modifications you are actually making are. To do -this use :c:func:`jbd2_journal_start` which returns a transaction handle. +this use jbd2_journal_start() which returns a transaction handle. -:c:func:`jbd2_journal_start` and its counterpart :c:func:`jbd2_journal_stop`, +jbd2_journal_start() and its counterpart jbd2_journal_stop(), which indicates the end of a transaction are nestable calls, so you can reenter a transaction if necessary, but remember you must call -:c:func:`jbd2_journal_stop` the same number of times as -:c:func:`jbd2_journal_start` before the transaction is completed (or more +jbd2_journal_stop() the same number of times as +jbd2_journal_start() before the transaction is completed (or more accurately leaves the update phase). Ext4/VFS makes use of this feature to simplify handling of inode dirtying, quota support, etc. Inside each transaction you need to wrap the modifications to the individual buffers (blocks). Before you start to modify a buffer you -need to call :c:func:`jbd2_journal_get_create_access()` / -:c:func:`jbd2_journal_get_write_access()` / -:c:func:`jbd2_journal_get_undo_access()` as appropriate, this allows the +need to call jbd2_journal_get_create_access() / +jbd2_journal_get_write_access() / +jbd2_journal_get_undo_access() as appropriate, this allows the journalling layer to copy the unmodified data if it needs to. After all the buffer may be part of a previously uncommitted transaction. At this point you are at last ready to modify a buffer, and once you are have done so you need to call -:c:func:`jbd2_journal_dirty_metadata`. Or if you've asked for access to a +jbd2_journal_dirty_metadata(). Or if you've asked for access to a buffer you now know is now longer required to be pushed back on the -device you can call :c:func:`jbd2_journal_forget` in much the same way as you -might have used :c:func:`bforget` in the past. +device you can call jbd2_journal_forget() in much the same way as you +might have used bforget() in the past. -A :c:func:`jbd2_journal_flush` may be called at any time to commit and +A jbd2_journal_flush() may be called at any time to commit and checkpoint all your transactions. -Then at umount time , in your :c:func:`put_super` you can then call -:c:func:`jbd2_journal_destroy` to clean up your in-core journal object. +Then at umount time , in your put_super() you can then call +jbd2_journal_destroy() to clean up your in-core journal object. Unfortunately there a couple of ways the journal layer can cause a deadlock. The first thing to note is that each task can only have a single outstanding transaction at any one time, remember nothing commits -until the outermost :c:func:`jbd2_journal_stop`. This means you must complete +until the outermost jbd2_journal_stop(). This means you must complete the transaction at the end of each file/inode/address etc. operation you perform, so that the journalling system isn't re-entered on another journal. Since transactions can't be nested/batched across differing journals, and another filesystem other than yours (say ext4) may be modified in a later syscall. -The second case to bear in mind is that :c:func:`jbd2_journal_start` can block +The second case to bear in mind is that jbd2_journal_start() can block if there isn't enough space in the journal for your transaction (based on the passed nblocks param) - when it blocks it merely(!) needs to wait for transactions to complete and be committed from other tasks, so -essentially we are waiting for :c:func:`jbd2_journal_stop`. So to avoid -deadlocks you must treat :c:func:`jbd2_journal_start` / -:c:func:`jbd2_journal_stop` as if they were semaphores and include them in +essentially we are waiting for jbd2_journal_stop(). So to avoid +deadlocks you must treat jbd2_journal_start() / +jbd2_journal_stop() as if they were semaphores and include them in your semaphore ordering rules to prevent -deadlocks. Note that :c:func:`jbd2_journal_extend` has similar blocking -behaviour to :c:func:`jbd2_journal_start` so you can deadlock here just as -easily as on :c:func:`jbd2_journal_start`. +deadlocks. Note that jbd2_journal_extend() has similar blocking +behaviour to jbd2_journal_start() so you can deadlock here just as +easily as on jbd2_journal_start(). Try to reserve the right number of blocks the first time. ;-). This will be the maximum number of blocks you are going to touch in this @@ -116,8 +116,8 @@ called after each transaction commit. You can also use that need processing when the transaction commits. JBD2 also provides a way to block all transaction updates via -:c:func:`jbd2_journal_lock_updates()` / -:c:func:`jbd2_journal_unlock_updates()`. Ext4 uses this when it wants a +jbd2_journal_lock_updates() / +jbd2_journal_unlock_updates(). Ext4 uses this when it wants a window with a clean and stable fs for a moment. E.g. :: From 2d88fc62d4b67cd7b3cf7ed89e4997122548ecdb Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Tue, 11 Aug 2020 00:06:13 +0530 Subject: [PATCH 083/122] Dev-tools: Documentation: Replace deprecated :c:func: Usage Replace :c:func: with func() as the previous usage is deprecated. Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20200810183613.25643-1-puranjay12@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/dev-tools/kgdb.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/dev-tools/kgdb.rst b/Documentation/dev-tools/kgdb.rst index 0e52e966a153..c908ef4d3f04 100644 --- a/Documentation/dev-tools/kgdb.rst +++ b/Documentation/dev-tools/kgdb.rst @@ -316,7 +316,7 @@ driver as a loadable kernel module kgdbwait will not do anything. Kernel parameter: ``kgdbcon`` ----------------------------- -The ``kgdbcon`` feature allows you to see :c:func:`printk` messages inside gdb +The ``kgdbcon`` feature allows you to see printk() messages inside gdb while gdb is connected to the kernel. Kdb does not make use of the kgdbcon feature. @@ -432,7 +432,7 @@ This is a quick example of how to use kdb. ``ps`` Displays only the active processes ``ps A`` Shows all the processes ``summary`` Shows kernel version info and memory usage - ``bt`` Get a backtrace of the current process using :c:func:`dump_stack` + ``bt`` Get a backtrace of the current process using dump_stack() ``dmesg`` View the kernel syslog buffer ``go`` Continue the system =========== ================================================================= @@ -724,7 +724,7 @@ The kernel debugger is organized into a number of components: The arch-specific portion implements: - contains an arch-specific trap catcher which invokes - :c:func:`kgdb_handle_exception` to start kgdb about doing its work + kgdb_handle_exception() to start kgdb about doing its work - translation to and from gdb specific packet format to :c:type:`pt_regs` @@ -769,7 +769,7 @@ The kernel debugger is organized into a number of components: config. Later run ``modprobe kdb_hello`` and the next time you enter the kdb shell, you can run the ``hello`` command. - - The implementation for :c:func:`kdb_printf` which emits messages directly + - The implementation for kdb_printf() which emits messages directly to I/O drivers, bypassing the kernel log. - SW / HW breakpoint management for the kdb shell @@ -875,7 +875,7 @@ kernel when ``CONFIG_KDB_KEYBOARD=y`` is set in the kernel configuration. The core polled keyboard driver for PS/2 type keyboards is in ``drivers/char/kdb_keyboard.c``. This driver is hooked into the debug core when kgdboc populates the callback in the array called -:c:type:`kdb_poll_funcs[]`. The :c:func:`kdb_get_kbd_char` is the top-level +:c:type:`kdb_poll_funcs[]`. The kdb_get_kbd_char() is the top-level function which polls hardware for single character input. kgdboc and kms @@ -887,10 +887,10 @@ that you have a video driver which has a frame buffer console and atomic kernel mode setting support. Every time the kernel debugger is entered it calls -:c:func:`kgdboc_pre_exp_handler` which in turn calls :c:func:`con_debug_enter` +kgdboc_pre_exp_handler() which in turn calls con_debug_enter() in the virtual console layer. On resuming kernel execution, the kernel -debugger calls :c:func:`kgdboc_post_exp_handler` which in turn calls -:c:func:`con_debug_leave`. +debugger calls kgdboc_post_exp_handler() which in turn calls +con_debug_leave(). Any video driver that wants to be compatible with the kernel debugger and the atomic kms callbacks must implement the ``mode_set_base_atomic``, From ec8213f8900573cbbf250b240f9aebd78366b251 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Tue, 11 Aug 2020 00:00:19 +0530 Subject: [PATCH 084/122] Core-api: Documentation: Replace deprecated :c:func: Usage Replace :c:func: with func() as the previous usage is deprecated. Signed-off-by: Puranjay Mohan Reviewed-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20200810183019.22170-1-puranjay12@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/core-api/idr.rst | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Documentation/core-api/idr.rst b/Documentation/core-api/idr.rst index a2738050c4f0..2eb5afdb9931 100644 --- a/Documentation/core-api/idr.rst +++ b/Documentation/core-api/idr.rst @@ -20,48 +20,48 @@ only ID allocation, and as a result is much more memory-efficient. IDR usage ========= -Start by initialising an IDR, either with :c:func:`DEFINE_IDR` -for statically allocated IDRs or :c:func:`idr_init` for dynamically +Start by initialising an IDR, either with DEFINE_IDR() +for statically allocated IDRs or idr_init() for dynamically allocated IDRs. -You can call :c:func:`idr_alloc` to allocate an unused ID. Look up -the pointer you associated with the ID by calling :c:func:`idr_find` -and free the ID by calling :c:func:`idr_remove`. +You can call idr_alloc() to allocate an unused ID. Look up +the pointer you associated with the ID by calling idr_find() +and free the ID by calling idr_remove(). If you need to change the pointer associated with an ID, you can call -:c:func:`idr_replace`. One common reason to do this is to reserve an +idr_replace(). One common reason to do this is to reserve an ID by passing a ``NULL`` pointer to the allocation function; initialise the object with the reserved ID and finally insert the initialised object into the IDR. Some users need to allocate IDs larger than ``INT_MAX``. So far all of these users have been content with a ``UINT_MAX`` limit, and they use -:c:func:`idr_alloc_u32`. If you need IDs that will not fit in a u32, +idr_alloc_u32(). If you need IDs that will not fit in a u32, we will work with you to address your needs. If you need to allocate IDs sequentially, you can use -:c:func:`idr_alloc_cyclic`. The IDR becomes less efficient when dealing +idr_alloc_cyclic(). The IDR becomes less efficient when dealing with larger IDs, so using this function comes at a slight cost. To perform an action on all pointers used by the IDR, you can -either use the callback-based :c:func:`idr_for_each` or the -iterator-style :c:func:`idr_for_each_entry`. You may need to use -:c:func:`idr_for_each_entry_continue` to continue an iteration. You can -also use :c:func:`idr_get_next` if the iterator doesn't fit your needs. +either use the callback-based idr_for_each() or the +iterator-style idr_for_each_entry(). You may need to use +idr_for_each_entry_continue() to continue an iteration. You can +also use idr_get_next() if the iterator doesn't fit your needs. -When you have finished using an IDR, you can call :c:func:`idr_destroy` +When you have finished using an IDR, you can call idr_destroy() to release the memory used by the IDR. This will not free the objects pointed to from the IDR; if you want to do that, use one of the iterators to do it. -You can use :c:func:`idr_is_empty` to find out whether there are any +You can use idr_is_empty() to find out whether there are any IDs currently allocated. If you need to take a lock while allocating a new ID from the IDR, you may need to pass a restrictive set of GFP flags, which can lead to the IDR being unable to allocate memory. To work around this, -you can call :c:func:`idr_preload` before taking the lock, and then -:c:func:`idr_preload_end` after the allocation. +you can call idr_preload() before taking the lock, and then +idr_preload_end() after the allocation. .. kernel-doc:: include/linux/idr.h :doc: idr sync From a5019c7f563154bec1cc4026e0883f86126fb2f4 Mon Sep 17 00:00:00 2001 From: Sumera Priyadarsini Date: Tue, 11 Aug 2020 05:53:50 +0530 Subject: [PATCH 085/122] documentation: coccinelle: Improve command example for make C={1,2} Modify coccinelle documentation to further clarify the usage of the makefile C variable by coccicheck. Signed-off-by: Sumera Priyadarsini Acked-by: Julia Lawall Link: https://lore.kernel.org/r/20200811002350.5553-1-sylphrenadin@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/dev-tools/coccinelle.rst | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Documentation/dev-tools/coccinelle.rst b/Documentation/dev-tools/coccinelle.rst index 6c791af1c859..74c5e6aeeff5 100644 --- a/Documentation/dev-tools/coccinelle.rst +++ b/Documentation/dev-tools/coccinelle.rst @@ -175,13 +175,20 @@ For example, to check drivers/net/wireless/ one may write:: make coccicheck M=drivers/net/wireless/ To apply Coccinelle on a file basis, instead of a directory basis, the -following command may be used:: +C variable is used by the makefile to select which files to work with. +This variable can be used to run scripts for the entire kernel, a +specific directory, or for a single file. - make C=1 CHECK="scripts/coccicheck" +For example, to check drivers/bluetooth/bfusb.c, the value 1 is +passed to the C variable to check files that make considers +need to be compiled.:: -To check only newly edited code, use the value 2 for the C flag, i.e.:: + make C=1 CHECK=scripts/coccicheck drivers/bluetooth/bfusb.o - make C=2 CHECK="scripts/coccicheck" +The value 2 is passed to the C variable to check files regardless of +whether they need to be compiled or not.:: + + make C=2 CHECK=scripts/coccicheck drivers/bluetooth/bfusb.o In these modes, which work on a file basis, there is no information about semantic patches displayed, and no commit message proposed. From f30c3ff3f0e8305d6c1a210df6d588a13333b8f7 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 10 Aug 2020 11:50:00 +0200 Subject: [PATCH 086/122] Documentation/features: refresh RISC-V arch support files Support for these was added by the following commits: f2c9699f6555 ("riscv: Add STACKPROTECTOR supported") 3c4697982982 ("riscv: Enable LOCKDEP_SUPPORT & fixup TRACE_IRQFLAGS_SUPPORT"). ed48b297fe21 ("riscv: Enable context tracking") cbb3d91d3bcf ("riscv: Add kmemleak support") Signed-off-by: Tobias Klauser Link: https://lore.kernel.org/r/20200810095000.32092-1-tklauser@distanz.ch Signed-off-by: Jonathan Corbet --- Documentation/features/debug/kmemleak/arch-support.txt | 2 +- Documentation/features/debug/stackprotector/arch-support.txt | 2 +- Documentation/features/locking/lockdep/arch-support.txt | 2 +- Documentation/features/time/context-tracking/arch-support.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/features/debug/kmemleak/arch-support.txt b/Documentation/features/debug/kmemleak/arch-support.txt index b7e4f3608838..2db76807ec6f 100644 --- a/Documentation/features/debug/kmemleak/arch-support.txt +++ b/Documentation/features/debug/kmemleak/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | ok | | sh: | ok | | sparc: | ok | diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt index 12410f606edc..79233416c05f 100644 --- a/Documentation/features/debug/stackprotector/arch-support.txt +++ b/Documentation/features/debug/stackprotector/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | TODO | | sh: | ok | | sparc: | TODO | diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt index 98cb9d85c55d..56c7cf1404da 100644 --- a/Documentation/features/locking/lockdep/arch-support.txt +++ b/Documentation/features/locking/lockdep/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | ok | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | ok | | sh: | ok | | sparc: | ok | diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt index 048bfb6d3872..0d211d737c9c 100644 --- a/Documentation/features/time/context-tracking/arch-support.txt +++ b/Documentation/features/time/context-tracking/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | TODO | | sh: | TODO | | sparc: | ok | From be3a5b0e8b8762ef2007d1c6780f273cbd7252d4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 9 Aug 2020 19:49:41 -0700 Subject: [PATCH 087/122] Doc: admin-guide: use correct legends in kernel-parameters.txt Documentation/admin-guide/kernel-parameters.rst includes a legend telling us what configurations or hardware platforms are relevant for certain boot options. For X86, it is spelled "X86" and for x86_64, it is spelled "X86-64", so make corrections for those. Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: x86@kernel.org Link: https://lore.kernel.org/r/20200810024941.30231-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- .../admin-guide/kernel-parameters.txt | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d325726ff425..8ccf1985a2a3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -719,7 +719,7 @@ memory region [offset, offset + size] for that kernel image. If '@offset' is omitted, then a suitable offset is selected automatically. - [KNL, x86_64] select a region under 4G first, and + [KNL, X86-64] Select a region under 4G first, and fall back to reserve region above 4G when '@offset' hasn't been specified. See Documentation/admin-guide/kdump/kdump.rst for further details. @@ -732,14 +732,14 @@ Documentation/admin-guide/kdump/kdump.rst for an example. crashkernel=size[KMG],high - [KNL, x86_64] range could be above 4G. Allow kernel + [KNL, X86-64] range could be above 4G. Allow kernel to allocate physical memory region from top, so could be above 4G if system have more than 4G ram installed. Otherwise memory region will be allocated below 4G, if available. It will be ignored if crashkernel=X is specified. crashkernel=size[KMG],low - [KNL, x86_64] range under 4G. When crashkernel=X,high + [KNL, X86-64] range under 4G. When crashkernel=X,high is passed, kernel could allocate physical memory region above 4G, that cause second kernel crash on system that require some amount of low memory, e.g. swiotlb @@ -1403,7 +1403,7 @@ gamma= [HW,DRM] - gart_fix_e820= [X86_64] disable the fix e820 for K8 GART + gart_fix_e820= [X86-64] disable the fix e820 for K8 GART Format: off | on default: on @@ -1790,7 +1790,7 @@ Format: 0 | 1 Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON. - init_pkru= [x86] Specify the default memory protection keys rights + init_pkru= [X86] Specify the default memory protection keys rights register contents for all processes. 0x55555554 by default (disallow access to all but pkey 0). Can override in debugfs after boot. @@ -1798,7 +1798,7 @@ inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver Format: - int_pln_enable [x86] Enable power limit notification interrupt + int_pln_enable [X86] Enable power limit notification interrupt integrity_audit=[IMA] Format: { "0" | "1" } @@ -1816,7 +1816,7 @@ bypassed by not enabling DMAR with this option. In this case, gfx device will use physical address for DMA. - forcedac [x86_64] + forcedac [X86-64] With this option iommu will not optimize to look for io virtual address below 32-bit forcing dual address cycle on pci bus for cards supporting greater @@ -1901,7 +1901,7 @@ strict regions from userspace. relaxed - iommu= [x86] + iommu= [X86] off force noforce @@ -1911,8 +1911,8 @@ merge nomerge soft - pt [x86] - nopt [x86] + pt [X86] + nopt [X86] nobypass [PPC/POWERNV] Disable IOMMU bypass, using IOMMU for PCI devices. @@ -2055,21 +2055,21 @@ iucv= [HW,NET] - ivrs_ioapic [HW,X86_64] + ivrs_ioapic [HW,X86-64] Provide an override to the IOAPIC-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. For example, to map IOAPIC-ID decimal 10 to PCI device 00:14.0 write the parameter as: ivrs_ioapic[10]=00:14.0 - ivrs_hpet [HW,X86_64] + ivrs_hpet [HW,X86-64] Provide an override to the HPET-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. For example, to map HPET-ID decimal 0 to PCI device 00:14.0 write the parameter as: ivrs_hpet[0]=00:14.0 - ivrs_acpihid [HW,X86_64] + ivrs_acpihid [HW,X86-64] Provide an override to the ACPI-HID:UID<->DEVICE-ID mapping provided in the IVRS ACPI table. For example, to map UART-HID:UID AMD0020:0 to @@ -2346,7 +2346,7 @@ lapic [X86-32,APIC] Enable the local APIC even if BIOS disabled it. - lapic= [x86,APIC] "notscdeadline" Do not use TSC deadline + lapic= [X86,APIC] "notscdeadline" Do not use TSC deadline value for LAPIC timer one-shot implementation. Default back to the programmable timer unit in the LAPIC. @@ -3162,12 +3162,12 @@ register save and restore. The kernel will only save legacy floating-point registers on task switch. - nohugeiomap [KNL,x86,PPC] Disable kernel huge I/O mappings. + nohugeiomap [KNL,X86,PPC] Disable kernel huge I/O mappings. nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. - [KNL,x86] Disable symmetric multithreading (SMT). + [KNL,X86] Disable symmetric multithreading (SMT). nosmt=force: Force disable SMT, cannot be undone via the sysfs control file. @@ -3929,7 +3929,7 @@ pt. [PARIDE] See Documentation/admin-guide/blockdev/paride.rst. - pti= [X86_64] Control Page Table Isolation of user and + pti= [X86-64] Control Page Table Isolation of user and kernel address spaces. Disabling this feature removes hardening, but improves performance of system calls and interrupts. @@ -3941,7 +3941,7 @@ Not specifying this option is equivalent to pti=auto. - nopti [X86_64] + nopti [X86-64] Equivalent to pti=off pty.legacy_count= From 502b675050a506fb966c6f273d2ce42e5d8ddb07 Mon Sep 17 00:00:00 2001 From: Remi Andruccioli Date: Sat, 8 Aug 2020 18:31:23 +0200 Subject: [PATCH 088/122] docs: cdrom: Fix a typo and rst markup "The capability fags" should be "The capability flags". In rst markup, a incorrect markup expression is causing bad rendering in Sphinx output. Replace the erroneous single quote by a backquote. Signed-off-by: Remi Andruccioli Link: https://lore.kernel.org/r/20200808163123.17643-1-remi.andruccioli@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/cdrom/cdrom-standard.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst index dde4f7f7fdbf..5a1305638be7 100644 --- a/Documentation/cdrom/cdrom-standard.rst +++ b/Documentation/cdrom/cdrom-standard.rst @@ -571,7 +571,7 @@ phase. Currently, the capabilities are any of:: CDC_DRIVE_STATUS /* driver implements drive status */ The capability flag is declared *const*, to prevent drivers from -accidentally tampering with the contents. The capability fags actually +accidentally tampering with the contents. The capability flags actually inform `cdrom.c` of what the driver can do. If the drive found by the driver does not have the capability, is can be masked out by the *cdrom_device_info* variable *mask*. For instance, the SCSI CD-ROM @@ -750,7 +750,7 @@ Description of routines in `cdrom.c` Only a few routines in `cdrom.c` are exported to the drivers. In this new section we will discuss these, as well as the functions that `take -over' the CD-ROM interface to the kernel. The header file belonging +over` the CD-ROM interface to the kernel. The header file belonging to `cdrom.c` is called `cdrom.h`. Formerly, some of the contents of this file were placed in the file `ucdrom.h`, but this file has now been merged back into `cdrom.h`. From 2824a3669fb28324a76bba1d2203f2f11b126124 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 8 Aug 2020 00:14:36 -0700 Subject: [PATCH 089/122] mailmap: Update comments for with format and more detalis Without having first read the git-shortlog man-page, the format of .mailmap may not be immediately obvious. Add comments with pointers to the man-page, along with other details. Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/202008080013.58EBD83@keescook Signed-off-by: Jonathan Corbet --- .mailmap | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 9b0b9ec3e53e..787df6ec8908 100644 --- a/.mailmap +++ b/.mailmap @@ -2,11 +2,16 @@ # This list is used by git-shortlog to fix a few botched name translations # in the git archive, either because the author's full name was messed up # and/or not always written the same way, making contributions from the -# same person appearing not to be so or badly displayed. +# same person appearing not to be so or badly displayed. Also allows for +# old email addresses to map to new email addresses. # +# For format details, see "MAPPING AUTHORS" in "man git-shortlog". +# +# Please keep this list dictionary sorted. +# +# This comment is parsed by git-shortlog: # repo-abbrev: /pub/scm/linux/kernel/git/ # - Aaron Durbin Adam Oldham Adam Radford From cca73e4946c4a8d2ce3395207d8a0b5c565b530b Mon Sep 17 00:00:00 2001 From: Billy Wilson Date: Thu, 6 Aug 2020 17:17:54 -0600 Subject: [PATCH 090/122] docs: Correct the release date of 5.2 stable A table lists the 5.2 stable release date as September 15, but it was released on July 7. This may confuse a reader who is trying to understand the stable update release cycle. Signed-off-by: Billy Wilson Link: https://lore.kernel.org/r/20200806231754.7735-1-billy_wilson@byu.edu Signed-off-by: Jonathan Corbet --- Documentation/process/2.Process.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/2.Process.rst b/Documentation/process/2.Process.rst index 3588f48841eb..4ae1e0f600c1 100644 --- a/Documentation/process/2.Process.rst +++ b/Documentation/process/2.Process.rst @@ -113,7 +113,7 @@ than one development cycle past their initial release. So, for example, the 5.2 kernel's history looked like this (all dates in 2019): ============== =============================== - September 15 5.2 stable release + July 7 5.2 stable release July 14 5.2.1 July 21 5.2.2 July 26 5.2.3 From 7033a95ab411db94f41b7178e52ae0e434356cb8 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Thu, 6 Aug 2020 18:14:56 +0200 Subject: [PATCH 091/122] docs: remove the 2.6 "Upgrading I2C Drivers" guide All the drivers have long since been upgraded, and all the important information here is also included in the "Implementing I2C device drivers" guide. Signed-off-by: Stephen Kitt Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/20200806161456.8680-1-steve@sk2.org Signed-off-by: Jonathan Corbet --- Documentation/i2c/index.rst | 1 - Documentation/i2c/upgrading-clients.rst | 285 ------------------------ 2 files changed, 286 deletions(-) delete mode 100644 Documentation/i2c/upgrading-clients.rst diff --git a/Documentation/i2c/index.rst b/Documentation/i2c/index.rst index fee4744475df..8a2ad3845191 100644 --- a/Documentation/i2c/index.rst +++ b/Documentation/i2c/index.rst @@ -62,7 +62,6 @@ Legacy documentation .. toctree:: :maxdepth: 1 - upgrading-clients old-module-parameters .. only:: subproject and html diff --git a/Documentation/i2c/upgrading-clients.rst b/Documentation/i2c/upgrading-clients.rst deleted file mode 100644 index 1708090d7b8f..000000000000 --- a/Documentation/i2c/upgrading-clients.rst +++ /dev/null @@ -1,285 +0,0 @@ -================================================= -Upgrading I2C Drivers to the new 2.6 Driver Model -================================================= - -Ben Dooks - -Introduction ------------- - -This guide outlines how to alter existing Linux 2.6 client drivers from -the old to the new binding methods. - - -Example old-style driver ------------------------- - -:: - - struct example_state { - struct i2c_client client; - .... - }; - - static struct i2c_driver example_driver; - - static unsigned short ignore[] = { I2C_CLIENT_END }; - static unsigned short normal_addr[] = { OUR_ADDR, I2C_CLIENT_END }; - - I2C_CLIENT_INSMOD; - - static int example_attach(struct i2c_adapter *adap, int addr, int kind) - { - struct example_state *state; - struct device *dev = &adap->dev; /* to use for dev_ reports */ - int ret; - - state = kzalloc(sizeof(struct example_state), GFP_KERNEL); - if (state == NULL) { - dev_err(dev, "failed to create our state\n"); - return -ENOMEM; - } - - example->client.addr = addr; - example->client.flags = 0; - example->client.adapter = adap; - - i2c_set_clientdata(&state->i2c_client, state); - strscpy(client->i2c_client.name, "example", sizeof(client->i2c_client.name)); - - ret = i2c_attach_client(&state->i2c_client); - if (ret < 0) { - dev_err(dev, "failed to attach client\n"); - kfree(state); - return ret; - } - - dev = &state->i2c_client.dev; - - /* rest of the initialisation goes here. */ - - dev_info(dev, "example client created\n"); - - return 0; - } - - static int example_detach(struct i2c_client *client) - { - struct example_state *state = i2c_get_clientdata(client); - - i2c_detach_client(client); - kfree(state); - return 0; - } - - static int example_attach_adapter(struct i2c_adapter *adap) - { - return i2c_probe(adap, &addr_data, example_attach); - } - - static struct i2c_driver example_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "example", - .pm = &example_pm_ops, - }, - .attach_adapter = example_attach_adapter, - .detach_client = example_detach, - }; - - -Updating the client -------------------- - -The new style binding model will check against a list of supported -devices and their associated address supplied by the code registering -the busses. This means that the driver .attach_adapter and -.detach_client methods can be removed, along with the addr_data, -as follows:: - - - static struct i2c_driver example_driver; - - - static unsigned short ignore[] = { I2C_CLIENT_END }; - - static unsigned short normal_addr[] = { OUR_ADDR, I2C_CLIENT_END }; - - - I2C_CLIENT_INSMOD; - - - static int example_attach_adapter(struct i2c_adapter *adap) - - { - - return i2c_probe(adap, &addr_data, example_attach); - - } - - static struct i2c_driver example_driver = { - - .attach_adapter = example_attach_adapter, - - .detach_client = example_detach, - } - -Add the probe and remove methods to the i2c_driver, as so:: - - static struct i2c_driver example_driver = { - + .probe = example_probe, - + .remove = example_remove, - } - -Change the example_attach method to accept the new parameters -which include the i2c_client that it will be working with:: - - - static int example_attach(struct i2c_adapter *adap, int addr, int kind) - + static int example_probe(struct i2c_client *client, - + const struct i2c_device_id *id) - -Change the name of example_attach to example_probe to align it with the -i2c_driver entry names. The rest of the probe routine will now need to be -changed as the i2c_client has already been setup for use. - -The necessary client fields have already been setup before -the probe function is called, so the following client setup -can be removed:: - - - example->client.addr = addr; - - example->client.flags = 0; - - example->client.adapter = adap; - - - - strscpy(client->i2c_client.name, "example", sizeof(client->i2c_client.name)); - -The i2c_set_clientdata is now:: - - - i2c_set_clientdata(&state->client, state); - + i2c_set_clientdata(client, state); - -The call to i2c_attach_client is no longer needed, if the probe -routine exits successfully, then the driver will be automatically -attached by the core. Change the probe routine as so:: - - - ret = i2c_attach_client(&state->i2c_client); - - if (ret < 0) { - - dev_err(dev, "failed to attach client\n"); - - kfree(state); - - return ret; - - } - - -Remove the storage of 'struct i2c_client' from the 'struct example_state' -as we are provided with the i2c_client in our example_probe. Instead we -store a pointer to it for when it is needed. - -:: - - struct example_state { - - struct i2c_client client; - + struct i2c_client *client; - -the new i2c client as so:: - - - struct device *dev = &adap->dev; /* to use for dev_ reports */ - + struct device *dev = &i2c_client->dev; /* to use for dev_ reports */ - -And remove the change after our client is attached, as the driver no -longer needs to register a new client structure with the core:: - - - dev = &state->i2c_client.dev; - -In the probe routine, ensure that the new state has the client stored -in it:: - - static int example_probe(struct i2c_client *i2c_client, - const struct i2c_device_id *id) - { - struct example_state *state; - struct device *dev = &i2c_client->dev; - int ret; - - state = kzalloc(sizeof(struct example_state), GFP_KERNEL); - if (state == NULL) { - dev_err(dev, "failed to create our state\n"); - return -ENOMEM; - } - - + state->client = i2c_client; - -Update the detach method, by changing the name to _remove and -to delete the i2c_detach_client call. It is possible that you -can also remove the ret variable as it is not needed for any -of the core functions. - -:: - - - static int example_detach(struct i2c_client *client) - + static int example_remove(struct i2c_client *client) - { - struct example_state *state = i2c_get_clientdata(client); - - - i2c_detach_client(client); - -And finally ensure that we have the correct ID table for the i2c-core -and other utilities:: - - + struct i2c_device_id example_idtable[] = { - + { "example", 0 }, - + { } - +}; - + - +MODULE_DEVICE_TABLE(i2c, example_idtable); - - static struct i2c_driver example_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "example", - }, - + .id_table = example_ids, - - -Our driver should now look like this:: - - struct example_state { - struct i2c_client *client; - .... - }; - - static int example_probe(struct i2c_client *client, - const struct i2c_device_id *id) - { - struct example_state *state; - struct device *dev = &client->dev; - - state = kzalloc(sizeof(struct example_state), GFP_KERNEL); - if (state == NULL) { - dev_err(dev, "failed to create our state\n"); - return -ENOMEM; - } - - state->client = client; - i2c_set_clientdata(client, state); - - /* rest of the initialisation goes here. */ - - dev_info(dev, "example client created\n"); - - return 0; - } - - static int example_remove(struct i2c_client *client) - { - struct example_state *state = i2c_get_clientdata(client); - - kfree(state); - return 0; - } - - static struct i2c_device_id example_idtable[] = { - { "example", 0 }, - { } - }; - - MODULE_DEVICE_TABLE(i2c, example_idtable); - - static struct i2c_driver example_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "example", - .pm = &example_pm_ops, - }, - .id_table = example_idtable, - .probe = example_probe, - .remove = example_remove, - }; From ccfaed7bd443340d1cac7b6af7bb588766799964 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sun, 2 Aug 2020 18:21:01 +0200 Subject: [PATCH 092/122] doc/zh_CN: fix title heading markup in admin-guide cpu-load Documentation generation warns: Documentation/translations/zh_CN/admin-guide/cpu-load.rst:1: WARNING: Title overline too short. Extend title heading markup by one. It was just off by one. Fixes: e210c66d567c ("doc/zh_CN: add cpu-load Chinese version") Signed-off-by: Lukas Bulwahn Acked-by: Tao Zhou Link: https://lore.kernel.org/r/20200802162101.18875-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/admin-guide/cpu-load.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/translations/zh_CN/admin-guide/cpu-load.rst b/Documentation/translations/zh_CN/admin-guide/cpu-load.rst index 0116d0477799..c972731c0e57 100644 --- a/Documentation/translations/zh_CN/admin-guide/cpu-load.rst +++ b/Documentation/translations/zh_CN/admin-guide/cpu-load.rst @@ -1,6 +1,6 @@ -======= +======== CPU 负载 -======= +======== Linux通过``/proc/stat``和``/proc/uptime``导出各种信息,用户空间工具 如top(1)使用这些信息计算系统花费在某个特定状态的平均时间。 From e176b7a3054eef44a22f6ca3d14168dcf9bad21e Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sun, 2 Aug 2020 18:19:56 +0200 Subject: [PATCH 093/122] doc/zh_CN: resolve undefined label warning in admin-guide index Documentation generation warns: Documentation/translations/zh_CN/admin-guide/index.rst:3: WARNING: undefined label: documentation/admin-guide/index.rst Use doc reference for .rst files to resolve the warning. Fixes: 37a607cf2318 ("doc/zh_CN: add admin-guide index") Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20200802161956.18268-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/admin-guide/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/translations/zh_CN/admin-guide/index.rst b/Documentation/translations/zh_CN/admin-guide/index.rst index 7d502fa5da64..ed5ab7e37f38 100644 --- a/Documentation/translations/zh_CN/admin-guide/index.rst +++ b/Documentation/translations/zh_CN/admin-guide/index.rst @@ -1,6 +1,6 @@ .. include:: ../disclaimer-zh_CN.rst -:Original: :ref:`Documentation/admin-guide/index.rst` +:Original: :doc:`../../../admin-guide/index` :Translator: Alex Shi From b06c19d9f827f6743122795570bfc0c72db482b0 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 10 Aug 2020 17:02:58 -0700 Subject: [PATCH 094/122] net/tls: Fix kmap usage When MSG_OOB is specified to tls_device_sendpage() the mapped page is never unmapped. Hold off mapping the page until after the flags are checked and the page is actually needed. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Ira Weiny Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 18fa6067bb7f..b74e2741f74f 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -561,7 +561,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct iov_iter msg_iter; - char *kaddr = kmap(page); + char *kaddr; struct kvec iov; int rc; @@ -576,6 +576,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, goto out; } + kaddr = kmap(page); iov.iov_base = kaddr + offset; iov.iov_len = size; iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size); From c79f428d6f14e754503fa7a3145d62039ccd05c9 Mon Sep 17 00:00:00 2001 From: Xie He Date: Sat, 8 Aug 2020 19:35:48 -0700 Subject: [PATCH 095/122] drivers/net/wan/x25_asy: Added needed_headroom and a skb->len check 1. Added a skb->len check This driver expects upper layers to include a pseudo header of 1 byte when passing down a skb for transmission. This driver will read this 1-byte header. This patch added a skb->len check before reading the header to make sure the header exists. 2. Added needed_headroom When this driver transmits data, first this driver will remove a pseudo header of 1 byte, then the lapb module will prepend the LAPB header of 2 or 3 bytes. So the value of needed_headroom in this driver should be 3 - 1. Cc: Willem de Bruijn Cc: Martin Schiller Signed-off-by: Xie He Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/wan/x25_asy.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 84640a0c13f3..de7984463595 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -307,6 +307,14 @@ static netdev_tx_t x25_asy_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } + /* There should be a pseudo header of 1 byte added by upper layers. + * Check to make sure it is there before reading it. + */ + if (skb->len < 1) { + kfree_skb(skb); + return NETDEV_TX_OK; + } + switch (skb->data[0]) { case X25_IFACE_DATA: break; @@ -752,6 +760,12 @@ static void x25_asy_setup(struct net_device *dev) dev->type = ARPHRD_X25; dev->tx_queue_len = 10; + /* When transmitting data: + * first this driver removes a pseudo header of 1 byte, + * then the lapb module prepends an LAPB header of at most 3 bytes. + */ + dev->needed_headroom = 3 - 1; + /* New-style flags. */ dev->flags = IFF_NOARP; } From 1dab5877e8ebb7a00fbf0e7d797c869aa37830b9 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Sun, 9 Aug 2020 11:53:49 +0800 Subject: [PATCH 096/122] hinic: fix strncpy output truncated compile warnings fix the compile warnings of 'strncpy' output truncated before terminating nul copying N bytes from a string of the same length Signed-off-by: Luo bin Reported-by: kernel test robot Signed-off-by: David S. Miller --- .../net/ethernet/huawei/hinic/hinic_devlink.c | 32 +++++++------------ .../net/ethernet/huawei/hinic/hinic_hw_dev.h | 2 -- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c index c6adc776f3c8..16bda7381ba0 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c @@ -334,19 +334,14 @@ void hinic_devlink_unregister(struct hinic_devlink_priv *priv) static int chip_fault_show(struct devlink_fmsg *fmsg, struct hinic_fault_event *event) { - char fault_level[FAULT_TYPE_MAX][FAULT_SHOW_STR_LEN + 1] = { - "fatal", "reset", "flr", "general", "suggestion"}; - char level_str[FAULT_SHOW_STR_LEN + 1] = {0}; - u8 level; + const char * const level_str[FAULT_LEVEL_MAX + 1] = { + "fatal", "reset", "flr", "general", "suggestion", "Unknown"}; + u8 fault_level; int err; - level = event->event.chip.err_level; - if (level < FAULT_LEVEL_MAX) - strncpy(level_str, fault_level[level], strlen(fault_level[level])); - else - strncpy(level_str, "Unknown", strlen("Unknown")); - - if (level == FAULT_LEVEL_SERIOUS_FLR) { + fault_level = (event->event.chip.err_level < FAULT_LEVEL_MAX) ? + event->event.chip.err_level : FAULT_LEVEL_MAX; + if (fault_level == FAULT_LEVEL_SERIOUS_FLR) { err = devlink_fmsg_u32_pair_put(fmsg, "Function level err func_id", (u32)event->event.chip.func_id); if (err) @@ -361,7 +356,7 @@ static int chip_fault_show(struct devlink_fmsg *fmsg, if (err) return err; - err = devlink_fmsg_string_pair_put(fmsg, "err_level", level_str); + err = devlink_fmsg_string_pair_put(fmsg, "err_level", level_str[fault_level]); if (err) return err; @@ -381,18 +376,15 @@ static int chip_fault_show(struct devlink_fmsg *fmsg, static int fault_report_show(struct devlink_fmsg *fmsg, struct hinic_fault_event *event) { - char fault_type[FAULT_TYPE_MAX][FAULT_SHOW_STR_LEN + 1] = { + const char * const type_str[FAULT_TYPE_MAX + 1] = { "chip", "ucode", "mem rd timeout", "mem wr timeout", - "reg rd timeout", "reg wr timeout", "phy fault"}; - char type_str[FAULT_SHOW_STR_LEN + 1] = {0}; + "reg rd timeout", "reg wr timeout", "phy fault", "Unknown"}; + u8 fault_type; int err; - if (event->type < FAULT_TYPE_MAX) - strncpy(type_str, fault_type[event->type], strlen(fault_type[event->type])); - else - strncpy(type_str, "Unknown", strlen("Unknown")); + fault_type = (event->type < FAULT_TYPE_MAX) ? event->type : FAULT_TYPE_MAX; - err = devlink_fmsg_string_pair_put(fmsg, "Fault type", type_str); + err = devlink_fmsg_string_pair_put(fmsg, "Fault type", type_str[fault_type]); if (err) return err; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index dc6e645f2689..701eb81e09a7 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -504,8 +504,6 @@ enum hinic_fault_type { FAULT_TYPE_MAX, }; -#define FAULT_SHOW_STR_LEN 16 - enum hinic_fault_err_level { FAULT_LEVEL_FATAL, FAULT_LEVEL_SERIOUS_RESET, From 26896f01467a28651f7a536143fe5ac8449d4041 Mon Sep 17 00:00:00 2001 From: Qingyu Li Date: Mon, 10 Aug 2020 09:51:00 +0800 Subject: [PATCH 097/122] net/nfc/rawsock.c: add CAP_NET_RAW check. When creating a raw AF_NFC socket, CAP_NET_RAW needs to be checked first. Signed-off-by: Qingyu Li Signed-off-by: David S. Miller --- net/nfc/rawsock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index b2061b6746ea..955c195ae14b 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -328,10 +328,13 @@ static int rawsock_create(struct net *net, struct socket *sock, if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW)) return -ESOCKTNOSUPPORT; - if (sock->type == SOCK_RAW) + if (sock->type == SOCK_RAW) { + if (!capable(CAP_NET_RAW)) + return -EPERM; sock->ops = &rawsock_raw_ops; - else + } else { sock->ops = &rawsock_ops; + } sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern); if (!sk) From e71642009cbd4a874c3af71a4b16b39499f58658 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 10 Aug 2020 02:38:07 +0000 Subject: [PATCH 098/122] ionic_lif: Use devm_kcalloc() in ionic_qcq_alloc() A multiplication for the size determination of a memory allocation indicated that an array data structure should be processed. Thus use the corresponding function "devm_kcalloc". Signed-off-by: Xu Wang Acked-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 1944bf5264db..26988ad7ec97 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -412,7 +412,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->flags = flags; - new->q.info = devm_kzalloc(dev, sizeof(*new->q.info) * num_descs, + new->q.info = devm_kcalloc(dev, num_descs, sizeof(*new->q.info), GFP_KERNEL); if (!new->q.info) { netdev_err(lif->netdev, "Cannot allocate queue info\n"); @@ -462,7 +462,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED; } - new->cq.info = devm_kzalloc(dev, sizeof(*new->cq.info) * num_descs, + new->cq.info = devm_kcalloc(dev, num_descs, sizeof(*new->cq.info), GFP_KERNEL); if (!new->cq.info) { netdev_err(lif->netdev, "Cannot allocate completion queue info\n"); From 50caa777a3a24d7027748e96265728ce748b41ef Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 10 Aug 2020 10:57:05 +0800 Subject: [PATCH 099/122] net: qcom/emac: add missed clk_disable_unprepare in error path of emac_clks_phase1_init Fix the missing clk_disable_unprepare() before return from emac_clks_phase1_init() in the error handling case. Fixes: b9b17debc69d ("net: emac: emac gigabit ethernet controller driver") Reported-by: Hulk Robot Signed-off-by: Wang Hai Acked-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 20b1b43a0e39..1166b98d8bb2 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -474,13 +474,24 @@ static int emac_clks_phase1_init(struct platform_device *pdev, ret = clk_prepare_enable(adpt->clk[EMAC_CLK_CFG_AHB]); if (ret) - return ret; + goto disable_clk_axi; ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000); if (ret) - return ret; + goto disable_clk_cfg_ahb; - return clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); + if (ret) + goto disable_clk_cfg_ahb; + + return 0; + +disable_clk_cfg_ahb: + clk_disable_unprepare(adpt->clk[EMAC_CLK_CFG_AHB]); +disable_clk_axi: + clk_disable_unprepare(adpt->clk[EMAC_CLK_AXI]); + + return ret; } /* Enable clocks; needs emac_clks_phase1_init to be called before */ From 0f5907af39137f8183ed536aaa00f322d7365130 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 10 Aug 2020 08:16:58 -0400 Subject: [PATCH 100/122] net: Fix potential memory leak in proto_register() If we failed to assign proto idx, we free the twsk_slab_name but forget to free the twsk_slab. Add a helper function tw_prot_cleanup() to free these together and also use this helper function in proto_unregister(). Fixes: b45ce32135d1 ("sock: fix potential memory leak in proto_register()") Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/sock.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 49cd5ffe673e..c9083ad44ea1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3406,6 +3406,16 @@ static void sock_inuse_add(struct net *net, int val) } #endif +static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot) +{ + if (!twsk_prot) + return; + kfree(twsk_prot->twsk_slab_name); + twsk_prot->twsk_slab_name = NULL; + kmem_cache_destroy(twsk_prot->twsk_slab); + twsk_prot->twsk_slab = NULL; +} + static void req_prot_cleanup(struct request_sock_ops *rsk_prot) { if (!rsk_prot) @@ -3476,7 +3486,7 @@ int proto_register(struct proto *prot, int alloc_slab) prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) - goto out_free_timewait_sock_slab_name; + goto out_free_timewait_sock_slab; } } @@ -3484,15 +3494,15 @@ int proto_register(struct proto *prot, int alloc_slab) ret = assign_proto_idx(prot); if (ret) { mutex_unlock(&proto_list_mutex); - goto out_free_timewait_sock_slab_name; + goto out_free_timewait_sock_slab; } list_add(&prot->node, &proto_list); mutex_unlock(&proto_list_mutex); return ret; -out_free_timewait_sock_slab_name: +out_free_timewait_sock_slab: if (alloc_slab && prot->twsk_prot) - kfree(prot->twsk_prot->twsk_slab_name); + tw_prot_cleanup(prot->twsk_prot); out_free_request_sock_slab: if (alloc_slab) { req_prot_cleanup(prot->rsk_prot); @@ -3516,12 +3526,7 @@ void proto_unregister(struct proto *prot) prot->slab = NULL; req_prot_cleanup(prot->rsk_prot); - - if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { - kmem_cache_destroy(prot->twsk_prot->twsk_slab); - kfree(prot->twsk_prot->twsk_slab_name); - prot->twsk_prot->twsk_slab = NULL; - } + tw_prot_cleanup(prot->twsk_prot); } EXPORT_SYMBOL(proto_unregister); From 1b8ef1423dbfd34de2439a2db457b84480b7c8a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Mon, 10 Aug 2020 17:01:58 +0200 Subject: [PATCH 101/122] net: phy: marvell10g: fix null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c3e302edca24 ("net: phy: marvell10g: fix temperature sensor on 2110") added a check for PHY ID via phydev->drv->phy_id in a function which is called by devres at a time when phydev->drv is already set to null by phy_remove function. This null pointer dereference can be triggered via SFP subsystem with a SFP module containing this Marvell PHY. When the SFP interface is put down, the SFP subsystem removes the PHY. Fixes: c3e302edca24 ("net: phy: marvell10g: fix temperature sensor on 2110") Signed-off-by: Marek Behún Cc: Maxime Chevallier Cc: Andrew Lunn Cc: Baruch Siach Cc: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index a7610eb55f30..1901ba277413 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -208,13 +208,6 @@ static int mv3310_hwmon_config(struct phy_device *phydev, bool enable) MV_V2_TEMP_CTRL_MASK, val); } -static void mv3310_hwmon_disable(void *data) -{ - struct phy_device *phydev = data; - - mv3310_hwmon_config(phydev, false); -} - static int mv3310_hwmon_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -238,10 +231,6 @@ static int mv3310_hwmon_probe(struct phy_device *phydev) if (ret) return ret; - ret = devm_add_action_or_reset(dev, mv3310_hwmon_disable, phydev); - if (ret) - return ret; - priv->hwmon_dev = devm_hwmon_device_register_with_info(dev, priv->hwmon_name, phydev, &mv3310_hwmon_chip_info, NULL); @@ -426,6 +415,11 @@ static int mv3310_probe(struct phy_device *phydev) return phy_sfp_probe(phydev, &mv3310_sfp_ops); } +static void mv3310_remove(struct phy_device *phydev) +{ + mv3310_hwmon_config(phydev, false); +} + static int mv3310_suspend(struct phy_device *phydev) { return mv3310_power_down(phydev); @@ -784,6 +778,7 @@ static struct phy_driver mv3310_drivers[] = { .read_status = mv3310_read_status, .get_tunable = mv3310_get_tunable, .set_tunable = mv3310_set_tunable, + .remove = mv3310_remove, }, { .phy_id = MARVELL_PHY_ID_88E2110, @@ -798,6 +793,7 @@ static struct phy_driver mv3310_drivers[] = { .read_status = mv3310_read_status, .get_tunable = mv3310_get_tunable, .set_tunable = mv3310_set_tunable, + .remove = mv3310_remove, }, }; From 62ffc589abb176821662efc4525ee4ac0b9c3894 Mon Sep 17 00:00:00 2001 From: Tim Froidcoeur Date: Tue, 11 Aug 2020 20:33:23 +0200 Subject: [PATCH 102/122] net: refactor bind_bucket fastreuse into helper Refactor the fastreuse update code in inet_csk_get_port into a small helper function that can be called from other places. Acked-by: Matthieu Baerts Signed-off-by: Tim Froidcoeur Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++ net/ipv4/inet_connection_sock.c | 101 ++++++++++++++++------------- 2 files changed, 59 insertions(+), 46 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 1e209ce7d1bd..aa8893c68c50 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -304,6 +304,10 @@ void inet_csk_listen_stop(struct sock *sk); void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); +/* update the fast reuse flag when adding a socket */ +void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, + struct sock *sk); + struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); #define TCP_PINGPONG_THRESH 3 diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d1a3913eebe0..b457dd2d6c75 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -296,55 +296,12 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb, ipv6_only_sock(sk), true, false); } -/* Obtain a reference to a local port for the given sock, - * if snum is zero it means select any available local port. - * We try to allocate an odd port (and leave even ports for connect()) - */ -int inet_csk_get_port(struct sock *sk, unsigned short snum) +void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, + struct sock *sk) { - bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; - struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; - int ret = 1, port = snum; - struct inet_bind_hashbucket *head; - struct net *net = sock_net(sk); - struct inet_bind_bucket *tb = NULL; kuid_t uid = sock_i_uid(sk); - int l3mdev; + bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; - l3mdev = inet_sk_bound_l3mdev(sk); - - if (!port) { - head = inet_csk_find_open_port(sk, &tb, &port); - if (!head) - return ret; - if (!tb) - goto tb_not_found; - goto success; - } - head = &hinfo->bhash[inet_bhashfn(net, port, - hinfo->bhash_size)]; - spin_lock_bh(&head->lock); - inet_bind_bucket_for_each(tb, &head->chain) - if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && - tb->port == port) - goto tb_found; -tb_not_found: - tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, - net, head, port, l3mdev); - if (!tb) - goto fail_unlock; -tb_found: - if (!hlist_empty(&tb->owners)) { - if (sk->sk_reuse == SK_FORCE_REUSE) - goto success; - - if ((tb->fastreuse > 0 && reuse) || - sk_reuseport_match(tb, sk)) - goto success; - if (inet_csk_bind_conflict(sk, tb, true, true)) - goto fail_unlock; - } -success: if (hlist_empty(&tb->owners)) { tb->fastreuse = reuse; if (sk->sk_reuseport) { @@ -388,6 +345,58 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) tb->fastreuseport = 0; } } +} + +/* Obtain a reference to a local port for the given sock, + * if snum is zero it means select any available local port. + * We try to allocate an odd port (and leave even ports for connect()) + */ +int inet_csk_get_port(struct sock *sk, unsigned short snum) +{ + bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; + struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; + int ret = 1, port = snum; + struct inet_bind_hashbucket *head; + struct net *net = sock_net(sk); + struct inet_bind_bucket *tb = NULL; + int l3mdev; + + l3mdev = inet_sk_bound_l3mdev(sk); + + if (!port) { + head = inet_csk_find_open_port(sk, &tb, &port); + if (!head) + return ret; + if (!tb) + goto tb_not_found; + goto success; + } + head = &hinfo->bhash[inet_bhashfn(net, port, + hinfo->bhash_size)]; + spin_lock_bh(&head->lock); + inet_bind_bucket_for_each(tb, &head->chain) + if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && + tb->port == port) + goto tb_found; +tb_not_found: + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, + net, head, port, l3mdev); + if (!tb) + goto fail_unlock; +tb_found: + if (!hlist_empty(&tb->owners)) { + if (sk->sk_reuse == SK_FORCE_REUSE) + goto success; + + if ((tb->fastreuse > 0 && reuse) || + sk_reuseport_match(tb, sk)) + goto success; + if (inet_csk_bind_conflict(sk, tb, true, true)) + goto fail_unlock; + } +success: + inet_csk_update_fastreuse(tb, sk); + if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, port); WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); From d76f3351cea2d927fdf70dd7c06898235035e84e Mon Sep 17 00:00:00 2001 From: Tim Froidcoeur Date: Tue, 11 Aug 2020 20:33:24 +0200 Subject: [PATCH 103/122] net: initialize fastreuse on inet_inherit_port In the case of TPROXY, bind_conflict optimizations for SO_REUSEADDR or SO_REUSEPORT are broken, possibly resulting in O(n) instead of O(1) bind behaviour or in the incorrect reuse of a bind. the kernel keeps track for each bind_bucket if all sockets in the bind_bucket support SO_REUSEADDR or SO_REUSEPORT in two fastreuse flags. These flags allow skipping the costly bind_conflict check when possible (meaning when all sockets have the proper SO_REUSE option). For every socket added to a bind_bucket, these flags need to be updated. As soon as a socket that does not support reuse is added, the flag is set to false and will never go back to true, unless the bind_bucket is deleted. Note that there is no mechanism to re-evaluate these flags when a socket is removed (this might make sense when removing a socket that would not allow reuse; this leaves room for a future patch). For this optimization to work, it is mandatory that these flags are properly initialized and updated. When a child socket is created from a listen socket in __inet_inherit_port, the TPROXY case could create a new bind bucket without properly initializing these flags, thus preventing the optimization to work. Alternatively, a socket not allowing reuse could be added to an existing bind bucket without updating the flags, causing bind_conflict to never be called as it should. Call inet_csk_update_fastreuse when __inet_inherit_port decides to create a new bind_bucket or use a different bind_bucket than the one of the listen socket. Fixes: 093d282321da ("tproxy: fix hash locking issue when using port redirection in __inet_inherit_port()") Acked-by: Matthieu Baerts Signed-off-by: Tim Froidcoeur Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 4eb4cd8d20dd..239e54474b65 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -163,6 +163,7 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) return -ENOMEM; } } + inet_csk_update_fastreuse(tb, child); } inet_bind_hash(child, tb, port); spin_unlock(&head->lock); From 2c7f8937ef91520a8a4bd700d5817b5e9c99803c Mon Sep 17 00:00:00 2001 From: Tetsuhiro Kohada Date: Tue, 16 Jun 2020 11:18:07 +0900 Subject: [PATCH 104/122] exfat: remove EXFAT_SB_DIRTY flag This flag is set/reset in exfat_put_super()/exfat_sync_fs() to avoid sync_blockdev(). - exfat_put_super(): Before calling this, the VFS has already called sync_filesystem(), so sync is never performed here. - exfat_sync_fs(): After calling this, the VFS calls sync_blockdev(), so, it is meaningless to check EXFAT_SB_DIRTY or to bypass sync_blockdev() here. Remove the EXFAT_SB_DIRTY check to ensure synchronization. And remove the code related to the flag. Signed-off-by: Tetsuhiro Kohada Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/balloc.c | 4 ++-- fs/exfat/dir.c | 16 ++++++++-------- fs/exfat/exfat_fs.h | 5 +---- fs/exfat/fatent.c | 7 ++----- fs/exfat/misc.c | 3 +-- fs/exfat/namei.c | 12 ++++++------ fs/exfat/super.c | 14 ++++++-------- 7 files changed, 26 insertions(+), 35 deletions(-) diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index 4055eb00ea9b..a987919686c0 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -158,7 +158,7 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu) b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); set_bit_le(b, sbi->vol_amap[i]->b_data); - exfat_update_bh(sb, sbi->vol_amap[i], IS_DIRSYNC(inode)); + exfat_update_bh(sbi->vol_amap[i], IS_DIRSYNC(inode)); return 0; } @@ -180,7 +180,7 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu) b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); clear_bit_le(b, sbi->vol_amap[i]->b_data); - exfat_update_bh(sb, sbi->vol_amap[i], IS_DIRSYNC(inode)); + exfat_update_bh(sbi->vol_amap[i], IS_DIRSYNC(inode)); if (opts->discard) { int ret_discard; diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 119abf0d8dd6..97296bc2b241 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -470,7 +470,7 @@ int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir, &ep->dentry.file.access_date, NULL); - exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + exfat_update_bh(bh, IS_DIRSYNC(inode)); brelse(bh); ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh, §or); @@ -480,7 +480,7 @@ int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir, exfat_init_stream_entry(ep, (type == TYPE_FILE) ? ALLOC_FAT_CHAIN : ALLOC_NO_FAT_CHAIN, start_clu, size); - exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + exfat_update_bh(bh, IS_DIRSYNC(inode)); brelse(bh); return 0; @@ -516,7 +516,7 @@ int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir, } fep->dentry.file.checksum = cpu_to_le16(chksum); - exfat_update_bh(sb, fbh, IS_DIRSYNC(inode)); + exfat_update_bh(fbh, IS_DIRSYNC(inode)); release_fbh: brelse(fbh); return ret; @@ -538,7 +538,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, return -EIO; ep->dentry.file.num_ext = (unsigned char)(num_entries - 1); - exfat_update_bh(sb, bh, sync); + exfat_update_bh(bh, sync); brelse(bh); ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh, §or); @@ -547,7 +547,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, ep->dentry.stream.name_len = p_uniname->name_len; ep->dentry.stream.name_hash = cpu_to_le16(p_uniname->name_hash); - exfat_update_bh(sb, bh, sync); + exfat_update_bh(bh, sync); brelse(bh); for (i = EXFAT_FIRST_CLUSTER; i < num_entries; i++) { @@ -556,7 +556,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, return -EIO; exfat_init_name_entry(ep, uniname); - exfat_update_bh(sb, bh, sync); + exfat_update_bh(bh, sync); brelse(bh); uniname += EXFAT_FILE_NAME_LEN; } @@ -580,7 +580,7 @@ int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir, return -EIO; exfat_set_entry_type(ep, TYPE_DELETED); - exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + exfat_update_bh(bh, IS_DIRSYNC(inode)); brelse(bh); } @@ -610,7 +610,7 @@ void exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) for (i = 0; i < es->num_bh; i++) { if (es->modified) - exfat_update_bh(es->sb, es->bh[i], sync); + exfat_update_bh(es->bh[i], sync); brelse(es->bh[i]); } kfree(es); diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 75c7bdbeba6d..05553b2924ee 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -13,8 +13,6 @@ #define EXFAT_SUPER_MAGIC 0x2011BAB0UL #define EXFAT_ROOT_INO 1 -#define EXFAT_SB_DIRTY 0 - #define EXFAT_CLUSTERS_UNTRACKED (~0u) /* @@ -238,7 +236,6 @@ struct exfat_sb_info { unsigned int clu_srch_ptr; /* cluster search pointer */ unsigned int used_clusters; /* number of used clusters */ - unsigned long s_state; struct mutex s_lock; /* superblock lock */ struct exfat_mount_options options; struct nls_table *nls_io; /* Charset used for input and display */ @@ -515,7 +512,7 @@ void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, u8 *tz, __le16 *time, __le16 *date, u8 *time_cs); u16 exfat_calc_chksum16(void *data, int len, u16 chksum, int type); u32 exfat_calc_chksum32(void *data, int len, u32 chksum, int type); -void exfat_update_bh(struct super_block *sb, struct buffer_head *bh, int sync); +void exfat_update_bh(struct buffer_head *bh, int sync); void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, unsigned int size, unsigned char flags); void exfat_chain_dup(struct exfat_chain *dup, struct exfat_chain *ec); diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 4e5c5c9c0f2d..82ee8246c080 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -75,7 +75,7 @@ int exfat_ent_set(struct super_block *sb, unsigned int loc, fat_entry = (__le32 *)&(bh->b_data[off]); *fat_entry = cpu_to_le32(content); - exfat_update_bh(sb, bh, sb->s_flags & SB_SYNCHRONOUS); + exfat_update_bh(bh, sb->s_flags & SB_SYNCHRONOUS); exfat_mirror_bh(sb, sec, bh); brelse(bh); return 0; @@ -174,7 +174,6 @@ int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain) return -EIO; } - set_bit(EXFAT_SB_DIRTY, &sbi->s_state); clu = p_chain->dir; if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { @@ -274,7 +273,7 @@ int exfat_zeroed_cluster(struct inode *dir, unsigned int clu) goto release_bhs; } memset(bhs[n]->b_data, 0, sb->s_blocksize); - exfat_update_bh(sb, bhs[n], 0); + exfat_update_bh(bhs[n], 0); n++; blknr++; @@ -358,8 +357,6 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc, } } - set_bit(EXFAT_SB_DIRTY, &sbi->s_state); - p_chain->dir = EXFAT_EOF_CLUSTER; while ((new_clu = exfat_find_free_bitmap(sb, hint_clu)) != diff --git a/fs/exfat/misc.c b/fs/exfat/misc.c index 17d41f3d3709..8a3dde59052b 100644 --- a/fs/exfat/misc.c +++ b/fs/exfat/misc.c @@ -163,9 +163,8 @@ u32 exfat_calc_chksum32(void *data, int len, u32 chksum, int type) return chksum; } -void exfat_update_bh(struct super_block *sb, struct buffer_head *bh, int sync) +void exfat_update_bh(struct buffer_head *bh, int sync) { - set_bit(EXFAT_SB_DIRTY, &EXFAT_SB(sb)->s_state); set_buffer_uptodate(bh); mark_buffer_dirty(bh); diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 2b9e21094a96..126ed3ba8f47 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -387,7 +387,7 @@ static int exfat_find_empty_entry(struct inode *inode, ep->dentry.stream.valid_size = cpu_to_le64(size); ep->dentry.stream.size = ep->dentry.stream.valid_size; ep->dentry.stream.flags = p_dir->flags; - exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + exfat_update_bh(bh, IS_DIRSYNC(inode)); brelse(bh); if (exfat_update_dir_chksum(inode, &(ei->dir), ei->entry)) @@ -1071,7 +1071,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, epnew->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); ei->attr |= ATTR_ARCHIVE; } - exfat_update_bh(sb, new_bh, sync); + exfat_update_bh(new_bh, sync); brelse(old_bh); brelse(new_bh); @@ -1087,7 +1087,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, } memcpy(epnew, epold, DENTRY_SIZE); - exfat_update_bh(sb, new_bh, sync); + exfat_update_bh(new_bh, sync); brelse(old_bh); brelse(new_bh); @@ -1104,7 +1104,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, epold->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); ei->attr |= ATTR_ARCHIVE; } - exfat_update_bh(sb, old_bh, sync); + exfat_update_bh(old_bh, sync); brelse(old_bh); ret = exfat_init_ext_entry(inode, p_dir, oldentry, num_new_entries, p_uniname); @@ -1159,7 +1159,7 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir, epnew->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); ei->attr |= ATTR_ARCHIVE; } - exfat_update_bh(sb, new_bh, IS_DIRSYNC(inode)); + exfat_update_bh(new_bh, IS_DIRSYNC(inode)); brelse(mov_bh); brelse(new_bh); @@ -1175,7 +1175,7 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir, } memcpy(epnew, epmov, DENTRY_SIZE); - exfat_update_bh(sb, new_bh, IS_DIRSYNC(inode)); + exfat_update_bh(new_bh, IS_DIRSYNC(inode)); brelse(mov_bh); brelse(new_bh); diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 253a92460d52..b5bf6dedbe11 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -45,9 +45,6 @@ static void exfat_put_super(struct super_block *sb) struct exfat_sb_info *sbi = EXFAT_SB(sb); mutex_lock(&sbi->s_lock); - if (test_and_clear_bit(EXFAT_SB_DIRTY, &sbi->s_state)) - sync_blockdev(sb->s_bdev); - exfat_set_vol_flags(sb, VOL_CLEAN); exfat_free_bitmap(sbi); brelse(sbi->boot_bh); mutex_unlock(&sbi->s_lock); @@ -60,13 +57,14 @@ static int exfat_sync_fs(struct super_block *sb, int wait) struct exfat_sb_info *sbi = EXFAT_SB(sb); int err = 0; + if (!wait) + return 0; + /* If there are some dirty buffers in the bdev inode */ mutex_lock(&sbi->s_lock); - if (test_and_clear_bit(EXFAT_SB_DIRTY, &sbi->s_state)) { - sync_blockdev(sb->s_bdev); - if (exfat_set_vol_flags(sb, VOL_CLEAN)) - err = -EIO; - } + sync_blockdev(sb->s_bdev); + if (exfat_set_vol_flags(sb, VOL_CLEAN)) + err = -EIO; mutex_unlock(&sbi->s_lock); return err; } From 3db3c3fb840ed4a6c7666d1464959edd40fe54f1 Mon Sep 17 00:00:00 2001 From: Tetsuhiro Kohada Date: Tue, 23 Jun 2020 15:22:19 +0900 Subject: [PATCH 105/122] exfat: write multiple sectors at once Write multiple sectors at once when updating dir-entries. Add exfat_update_bhs() for that. It wait for write completion once instead of sector by sector. It's only effective if sync enabled. Signed-off-by: Tetsuhiro Kohada Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 15 +++++++++------ fs/exfat/exfat_fs.h | 1 + fs/exfat/misc.c | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 97296bc2b241..542f1a5ab56f 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -606,13 +606,16 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es) void exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) { - int i; + int i, err = 0; - for (i = 0; i < es->num_bh; i++) { - if (es->modified) - exfat_update_bh(es->bh[i], sync); - brelse(es->bh[i]); - } + if (es->modified) + err = exfat_update_bhs(es->bh, es->num_bh, sync); + + for (i = 0; i < es->num_bh; i++) + if (err) + bforget(es->bh[i]); + else + brelse(es->bh[i]); kfree(es); } diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 05553b2924ee..af0f526eece7 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -513,6 +513,7 @@ void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, u16 exfat_calc_chksum16(void *data, int len, u16 chksum, int type); u32 exfat_calc_chksum32(void *data, int len, u32 chksum, int type); void exfat_update_bh(struct buffer_head *bh, int sync); +int exfat_update_bhs(struct buffer_head **bhs, int nr_bhs, int sync); void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, unsigned int size, unsigned char flags); void exfat_chain_dup(struct exfat_chain *dup, struct exfat_chain *ec); diff --git a/fs/exfat/misc.c b/fs/exfat/misc.c index 8a3dde59052b..d34e6193258d 100644 --- a/fs/exfat/misc.c +++ b/fs/exfat/misc.c @@ -172,6 +172,25 @@ void exfat_update_bh(struct buffer_head *bh, int sync) sync_dirty_buffer(bh); } +int exfat_update_bhs(struct buffer_head **bhs, int nr_bhs, int sync) +{ + int i, err = 0; + + for (i = 0; i < nr_bhs; i++) { + set_buffer_uptodate(bhs[i]); + mark_buffer_dirty(bhs[i]); + if (sync) + write_dirty_buffer(bhs[i], 0); + } + + for (i = 0; i < nr_bhs && sync; i++) { + wait_on_buffer(bhs[i]); + if (!err && !buffer_uptodate(bhs[i])) + err = -EIO; + } + return err; +} + void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, unsigned int size, unsigned char flags) { From 8b0c471773819c8201dc0b0123e1580639ee1570 Mon Sep 17 00:00:00 2001 From: Tetsuhiro Kohada Date: Wed, 24 Jun 2020 09:54:54 +0900 Subject: [PATCH 106/122] exfat: add error check when updating dir-entries Add error check when synchronously updating dir-entries. Suggested-by: Sungjong Seo Signed-off-by: Tetsuhiro Kohada Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 3 ++- fs/exfat/exfat_fs.h | 2 +- fs/exfat/file.c | 5 ++++- fs/exfat/inode.c | 9 +++++---- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 542f1a5ab56f..573659bfbc55 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -604,7 +604,7 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es) es->modified = true; } -void exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) +int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) { int i, err = 0; @@ -617,6 +617,7 @@ void exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) else brelse(es->bh[i]); kfree(es); + return err; } static int exfat_walk_fat_chain(struct super_block *sb, diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index af0f526eece7..cb51d6e83199 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -460,7 +460,7 @@ struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es, int num); struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, struct exfat_chain *p_dir, int entry, unsigned int type); -void exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync); +int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync); int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir); /* inode.c */ diff --git a/fs/exfat/file.c b/fs/exfat/file.c index a6a063830edc..6bdabfd4b134 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -154,6 +154,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) struct timespec64 ts; struct exfat_dentry *ep, *ep2; struct exfat_entry_set_cache *es; + int err; es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES); @@ -188,7 +189,9 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) } exfat_update_dir_chksum_with_entry_set(es); - exfat_free_dentry_set(es, inode_needs_sync(inode)); + err = exfat_free_dentry_set(es, inode_needs_sync(inode)); + if (err) + return err; } /* cut off from the FAT chain */ diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index cf9ca6c4d046..f0160a7892a8 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -77,8 +77,7 @@ static int __exfat_write_inode(struct inode *inode, int sync) ep2->dentry.stream.size = ep2->dentry.stream.valid_size; exfat_update_dir_chksum_with_entry_set(es); - exfat_free_dentry_set(es, sync); - return 0; + return exfat_free_dentry_set(es, sync); } int exfat_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -222,6 +221,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, if (ei->dir.dir != DIR_DELETED && modified) { struct exfat_dentry *ep; struct exfat_entry_set_cache *es; + int err; es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES); @@ -240,8 +240,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, ep->dentry.stream.valid_size; exfat_update_dir_chksum_with_entry_set(es); - exfat_free_dentry_set(es, inode_needs_sync(inode)); - + err = exfat_free_dentry_set(es, inode_needs_sync(inode)); + if (err) + return err; } /* end of if != DIR_DELETED */ inode->i_blocks += From 4dc7d35e09ba78aa0a3bcaa9fad1c19952e018a7 Mon Sep 17 00:00:00 2001 From: Tetsuhiro Kohada Date: Wed, 24 Jun 2020 11:30:40 +0900 Subject: [PATCH 107/122] exfat: optimize exfat_zeroed_cluster() Replace part of exfat_zeroed_cluster() with exfat_update_bhs(). And remove exfat_sync_bhs(). Signed-off-by: Tetsuhiro Kohada Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/fatent.c | 53 +++++++++-------------------------------------- 1 file changed, 10 insertions(+), 43 deletions(-) diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 82ee8246c080..c3c9afee7418 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -229,21 +229,6 @@ int exfat_find_last_cluster(struct super_block *sb, struct exfat_chain *p_chain, return 0; } -static inline int exfat_sync_bhs(struct buffer_head **bhs, int nr_bhs) -{ - int i, err = 0; - - for (i = 0; i < nr_bhs; i++) - write_dirty_buffer(bhs[i], 0); - - for (i = 0; i < nr_bhs; i++) { - wait_on_buffer(bhs[i]); - if (!err && !buffer_uptodate(bhs[i])) - err = -EIO; - } - return err; -} - int exfat_zeroed_cluster(struct inode *dir, unsigned int clu) { struct super_block *sb = dir->i_sb; @@ -265,41 +250,23 @@ int exfat_zeroed_cluster(struct inode *dir, unsigned int clu) } /* Zeroing the unused blocks on this cluster */ - n = 0; while (blknr < last_blknr) { - bhs[n] = sb_getblk(sb, blknr); - if (!bhs[n]) { - err = -ENOMEM; - goto release_bhs; - } - memset(bhs[n]->b_data, 0, sb->s_blocksize); - exfat_update_bh(bhs[n], 0); - - n++; - blknr++; - - if (n == nr_bhs) { - if (IS_DIRSYNC(dir)) { - err = exfat_sync_bhs(bhs, n); - if (err) - goto release_bhs; + for (n = 0; n < nr_bhs && blknr < last_blknr; n++, blknr++) { + bhs[n] = sb_getblk(sb, blknr); + if (!bhs[n]) { + err = -ENOMEM; + goto release_bhs; } - - for (i = 0; i < n; i++) - brelse(bhs[i]); - n = 0; + memset(bhs[n]->b_data, 0, sb->s_blocksize); } - } - if (IS_DIRSYNC(dir)) { - err = exfat_sync_bhs(bhs, n); + err = exfat_update_bhs(bhs, n, IS_DIRSYNC(dir)); if (err) goto release_bhs; + + for (i = 0; i < n; i++) + brelse(bhs[i]); } - - for (i = 0; i < n; i++) - brelse(bhs[i]); - return 0; release_bhs: From 7018ec68f08249de17cb131b324d5a48e89ed898 Mon Sep 17 00:00:00 2001 From: Tetsuhiro Kohada Date: Fri, 31 Jul 2020 14:58:26 +0900 Subject: [PATCH 108/122] exfat: retain 'VolumeFlags' properly MediaFailure and VolumeDirty should be retained if these are set before mounting. In '3.1.13.3 Media Failure Field' of exfat specification describe: If, upon mounting a volume, the value of this field is 1, implementations which scan the entire volume for media failures and record all failures as "bad" clusters in the FAT (or otherwise resolve media failures) may clear the value of this field to 0. Therefore, We should not clear MediaFailure without scanning volume. In '8.1 Recommended Write Ordering' of exfat specification describe: Clear the value of the VolumeDirty field to 0, if its value prior to the first step was 0. Therefore, We should not clear VolumeDirty after mounting. Also rename ERR_MEDIUM to MEDIA_FAILURE. Signed-off-by: Tetsuhiro Kohada Signed-off-by: Namjae Jeon --- fs/exfat/exfat_fs.h | 6 ++++-- fs/exfat/exfat_raw.h | 5 ++--- fs/exfat/file.c | 4 ++-- fs/exfat/inode.c | 4 ++-- fs/exfat/namei.c | 20 ++++++++++---------- fs/exfat/super.c | 36 +++++++++++++++++++++++++++--------- 6 files changed, 47 insertions(+), 28 deletions(-) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index cb51d6e83199..95d717f8620c 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -224,7 +224,8 @@ struct exfat_sb_info { unsigned int num_FAT_sectors; /* num of FAT sectors */ unsigned int root_dir; /* root dir cluster */ unsigned int dentries_per_clu; /* num of dentries per cluster */ - unsigned int vol_flag; /* volume dirty flag */ + unsigned int vol_flags; /* volume flags */ + unsigned int vol_flags_persistent; /* volume flags to retain */ struct buffer_head *boot_bh; /* buffer_head of BOOT sector */ unsigned int map_clu; /* allocation bitmap start cluster */ @@ -380,7 +381,8 @@ static inline int exfat_sector_to_cluster(struct exfat_sb_info *sbi, } /* super.c */ -int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag); +int exfat_set_volume_dirty(struct super_block *sb); +int exfat_clear_volume_dirty(struct super_block *sb); /* fatent.c */ #define exfat_get_next_cluster(sb, pclu) exfat_ent_get(sb, *(pclu), pclu) diff --git a/fs/exfat/exfat_raw.h b/fs/exfat/exfat_raw.h index 350ce59cc324..6aec6288e1f2 100644 --- a/fs/exfat/exfat_raw.h +++ b/fs/exfat/exfat_raw.h @@ -14,9 +14,8 @@ #define EXFAT_MAX_FILE_LEN 255 -#define VOL_CLEAN 0x0000 -#define VOL_DIRTY 0x0002 -#define ERR_MEDIUM 0x0004 +#define VOLUME_DIRTY 0x0002 +#define MEDIA_FAILURE 0x0004 #define EXFAT_EOF_CLUSTER 0xFFFFFFFFu #define EXFAT_BAD_CLUSTER 0xFFFFFFF7u diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 6bdabfd4b134..f41f523a58ad 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -106,7 +106,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) if (ei->type != TYPE_FILE && ei->type != TYPE_DIR) return -EPERM; - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi); num_clusters_phys = @@ -220,7 +220,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) if (exfat_free_cluster(inode, &clu)) return -EIO; - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); return 0; } diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index f0160a7892a8..7f90204adef5 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -39,7 +39,7 @@ static int __exfat_write_inode(struct inode *inode, int sync) if (is_dir && ei->dir.dir == sbi->root_dir && ei->entry == -1) return 0; - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); /* get the directory entry of given file or directory */ es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES); @@ -167,7 +167,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, } if (*clu == EXFAT_EOF_CLUSTER) { - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); new_clu.dir = (last_clu == EXFAT_EOF_CLUSTER) ? EXFAT_EOF_CLUSTER : last_clu + 1; diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 126ed3ba8f47..e73f20f66cb2 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -562,10 +562,10 @@ static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, int err; mutex_lock(&EXFAT_SB(sb)->s_lock); - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); err = exfat_add_entry(dir, dentry->d_name.name, &cdir, TYPE_FILE, &info); - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); if (err) goto unlock; @@ -834,7 +834,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry) num_entries++; brelse(bh); - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); /* update the directory entry */ if (exfat_remove_entries(dir, &cdir, entry, 0, num_entries)) { err = -EIO; @@ -843,7 +843,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry) /* This doesn't modify ei */ ei->dir.dir = DIR_DELETED; - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); inode_inc_iversion(dir); dir->i_mtime = dir->i_atime = current_time(dir); @@ -873,10 +873,10 @@ static int exfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) int err; mutex_lock(&EXFAT_SB(sb)->s_lock); - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); err = exfat_add_entry(dir, dentry->d_name.name, &cdir, TYPE_DIR, &info); - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); if (err) goto unlock; @@ -1001,14 +1001,14 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry) num_entries++; brelse(bh); - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); err = exfat_remove_entries(dir, &cdir, entry, 0, num_entries); if (err) { exfat_err(sb, "failed to exfat_remove_entries : err(%d)", err); goto unlock; } ei->dir.dir = DIR_DELETED; - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); inode_inc_iversion(dir); dir->i_mtime = dir->i_atime = current_time(dir); @@ -1300,7 +1300,7 @@ static int __exfat_rename(struct inode *old_parent_inode, if (ret) goto out; - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); if (olddir.dir == newdir.dir) ret = exfat_rename_file(new_parent_inode, &olddir, dentry, @@ -1355,7 +1355,7 @@ static int __exfat_rename(struct inode *old_parent_inode, */ new_ei->dir.dir = DIR_DELETED; } - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); out: return ret; } diff --git a/fs/exfat/super.c b/fs/exfat/super.c index b5bf6dedbe11..3b6a1659892f 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -63,7 +63,7 @@ static int exfat_sync_fs(struct super_block *sb, int wait) /* If there are some dirty buffers in the bdev inode */ mutex_lock(&sbi->s_lock); sync_blockdev(sb->s_bdev); - if (exfat_set_vol_flags(sb, VOL_CLEAN)) + if (exfat_clear_volume_dirty(sb)) err = -EIO; mutex_unlock(&sbi->s_lock); return err; @@ -96,17 +96,20 @@ static int exfat_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag) +static int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flags) { struct exfat_sb_info *sbi = EXFAT_SB(sb); struct boot_sector *p_boot = (struct boot_sector *)sbi->boot_bh->b_data; bool sync; + /* retain persistent-flags */ + new_flags |= sbi->vol_flags_persistent; + /* flags are not changed */ - if (sbi->vol_flag == new_flag) + if (sbi->vol_flags == new_flags) return 0; - sbi->vol_flag = new_flag; + sbi->vol_flags = new_flags; /* skip updating volume dirty flag, * if this volume has been mounted with read-only @@ -114,9 +117,9 @@ int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag) if (sb_rdonly(sb)) return 0; - p_boot->vol_flags = cpu_to_le16(new_flag); + p_boot->vol_flags = cpu_to_le16(new_flags); - if (new_flag == VOL_DIRTY && !buffer_dirty(sbi->boot_bh)) + if ((new_flags & VOLUME_DIRTY) && !buffer_dirty(sbi->boot_bh)) sync = true; else sync = false; @@ -129,6 +132,20 @@ int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag) return 0; } +int exfat_set_volume_dirty(struct super_block *sb) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + return exfat_set_vol_flags(sb, sbi->vol_flags | VOLUME_DIRTY); +} + +int exfat_clear_volume_dirty(struct super_block *sb) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + return exfat_set_vol_flags(sb, sbi->vol_flags & ~VOLUME_DIRTY); +} + static int exfat_show_options(struct seq_file *m, struct dentry *root) { struct super_block *sb = root->d_sb; @@ -457,7 +474,8 @@ static int exfat_read_boot_sector(struct super_block *sb) sbi->dentries_per_clu = 1 << (sbi->cluster_size_bits - DENTRY_SIZE_BITS); - sbi->vol_flag = le16_to_cpu(p_boot->vol_flags); + sbi->vol_flags = le16_to_cpu(p_boot->vol_flags); + sbi->vol_flags_persistent = sbi->vol_flags & (VOLUME_DIRTY | MEDIA_FAILURE); sbi->clu_srch_ptr = EXFAT_FIRST_CLUSTER; sbi->used_clusters = EXFAT_CLUSTERS_UNTRACKED; @@ -472,9 +490,9 @@ static int exfat_read_boot_sector(struct super_block *sb) exfat_err(sb, "bogus data start sector"); return -EINVAL; } - if (sbi->vol_flag & VOL_DIRTY) + if (sbi->vol_flags & VOLUME_DIRTY) exfat_warn(sb, "Volume was not properly unmounted. Some data may be corrupt. Please run fsck."); - if (sbi->vol_flag & ERR_MEDIUM) + if (sbi->vol_flags & MEDIA_FAILURE) exfat_warn(sb, "Medium has reported failures. Some data may be lost."); /* exFAT file size is limited by a disk volume size */ From c57dd1f2f6a7cd1bb61802344f59ccdc5278c983 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 31 Jul 2020 19:29:11 +0800 Subject: [PATCH 109/122] btrfs: trim: fix underflow in trim length to prevent access beyond device boundary [BUG] The following script can lead to tons of beyond device boundary access: mkfs.btrfs -f $dev -b 10G mount $dev $mnt trimfs $mnt btrfs filesystem resize 1:-1G $mnt trimfs $mnt [CAUSE] Since commit 929be17a9b49 ("btrfs: Switch btrfs_trim_free_extents to find_first_clear_extent_bit"), we try to avoid trimming ranges that's already trimmed. So we check device->alloc_state by finding the first range which doesn't have CHUNK_TRIMMED and CHUNK_ALLOCATED not set. But if we shrunk the device, that bits are not cleared, thus we could easily got a range starts beyond the shrunk device size. This results the returned @start and @end are all beyond device size, then we call "end = min(end, device->total_bytes -1);" making @end smaller than device size. Then finally we goes "len = end - start + 1", totally underflow the result, and lead to the beyond-device-boundary access. [FIX] This patch will fix the problem in two ways: - Clear CHUNK_TRIMMED | CHUNK_ALLOCATED bits when shrinking device This is the root fix - Add extra safety check when trimming free device extents We check and warn if the returned range is already beyond current device. Link: https://github.com/kdave/btrfs-progs/issues/282 Fixes: 929be17a9b49 ("btrfs: Switch btrfs_trim_free_extents to find_first_clear_extent_bit") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent-io-tree.h | 2 ++ fs/btrfs/extent-tree.c | 14 ++++++++++++++ fs/btrfs/volumes.c | 4 ++++ 3 files changed, 20 insertions(+) diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h index f39d47a2d01a..219a09a2b734 100644 --- a/fs/btrfs/extent-io-tree.h +++ b/fs/btrfs/extent-io-tree.h @@ -34,6 +34,8 @@ struct io_failure_record; */ #define CHUNK_ALLOCATED EXTENT_DIRTY #define CHUNK_TRIMMED EXTENT_DEFRAG +#define CHUNK_STATE_MASK (CHUNK_ALLOCATED | \ + CHUNK_TRIMMED) enum { IO_TREE_FS_PINNED_EXTENTS, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 61ede335f6c3..de6fe176fdfb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,6 +33,7 @@ #include "delalloc-space.h" #include "block-group.h" #include "discard.h" +#include "rcu-string.h" #undef SCRAMBLE_DELAYED_REFS @@ -5668,6 +5669,19 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) &start, &end, CHUNK_TRIMMED | CHUNK_ALLOCATED); + /* Check if there are any CHUNK_* bits left */ + if (start > device->total_bytes) { + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + btrfs_warn_in_rcu(fs_info, +"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu", + start, end - start + 1, + rcu_str_deref(device->name), + device->total_bytes); + mutex_unlock(&fs_info->chunk_mutex); + ret = 0; + break; + } + /* Ensure we skip the reserved area in the first 1M */ start = max_t(u64, start, SZ_1M); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d7670e2a9f39..ee96c5869f57 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4720,6 +4720,10 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) } mutex_lock(&fs_info->chunk_mutex); + /* Clear all state bits beyond the shrunk device size */ + clear_extent_bits(&device->alloc_state, new_size, (u64)-1, + CHUNK_STATE_MASK); + btrfs_device_set_disk_total_bytes(device, new_size); if (list_empty(&device->post_commit_list)) list_add_tail(&device->post_commit_list, From 41077c99026643c633a1f79376d369fffc757e06 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 12 Aug 2020 10:32:49 +0100 Subject: [PATCH 110/122] sfc: fix ef100 design-param checking The handling of the RXQ/TXQ size granularity design-params had two problems: it had a 64-bit divide that didn't build on 32-bit platforms, and it could divide by zero if the NIC supplied 0 as the value of the design-param. Fix both by checking for 0 and for a granularity bigger than our min-size; if the granularity <= EFX_MIN_DMAQ_SIZE then it fits in 32 bits, so we can cast it to u32 for the divide. Reported-by: kernel test robot Signed-off-by: Edward Cree Reviewed-by: Guenter Roeck Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef100_nic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 36598d0542ed..206d70f9d95b 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -979,7 +979,8 @@ static int ef100_process_design_param(struct efx_nic *efx, * EFX_MIN_DMAQ_SIZE is divisible by GRANULARITY. * This is very unlikely to fail. */ - if (EFX_MIN_DMAQ_SIZE % reader->value) { + if (!reader->value || reader->value > EFX_MIN_DMAQ_SIZE || + EFX_MIN_DMAQ_SIZE % (u32)reader->value) { netif_err(efx, probe, efx->net_dev, "%s size granularity is %llu, can't guarantee safety\n", reader->type == ESE_EF100_DP_GZ_RXQ_SIZE_GRANULARITY ? "RXQ" : "TXQ", From 1980c05844830a44708c98c96d600833aa3fae08 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 12 Aug 2020 14:56:02 +0200 Subject: [PATCH 111/122] vsock: fix potential null pointer dereference in vsock_poll() syzbot reported this issue where in the vsock_poll() we find the socket state at TCP_ESTABLISHED, but 'transport' is null: general protection fault, probably for non-canonical address 0xdffffc0000000012: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000090-0x0000000000000097] CPU: 0 PID: 8227 Comm: syz-executor.2 Not tainted 5.8.0-rc7-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:vsock_poll+0x75a/0x8e0 net/vmw_vsock/af_vsock.c:1038 Call Trace: sock_poll+0x159/0x460 net/socket.c:1266 vfs_poll include/linux/poll.h:90 [inline] do_pollfd fs/select.c:869 [inline] do_poll fs/select.c:917 [inline] do_sys_poll+0x607/0xd40 fs/select.c:1011 __do_sys_poll fs/select.c:1069 [inline] __se_sys_poll fs/select.c:1057 [inline] __x64_sys_poll+0x18c/0x440 fs/select.c:1057 do_syscall_64+0x60/0xe0 arch/x86/entry/common.c:384 entry_SYSCALL_64_after_hwframe+0x44/0xa9 This issue can happen if the TCP_ESTABLISHED state is set after we read the vsk->transport in the vsock_poll(). We could put barriers to synchronize, but this can only happen during connection setup, so we can simply check that 'transport' is valid. Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Reported-and-tested-by: syzbot+a61bac2fcc1a7c6623fe@syzkaller.appspotmail.com Signed-off-by: Stefano Garzarella Reviewed-by: Jorgen Hansen Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 27bbcfad9c17..9e93bc201cc0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1032,7 +1032,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, } /* Connected sockets that can produce data can be written. */ - if (sk->sk_state == TCP_ESTABLISHED) { + if (transport && sk->sk_state == TCP_ESTABLISHED) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { bool space_avail_now = false; int ret = transport->notify_poll_out( From 06a7a37be55e29961c9ba2abec4d07c8e0e21861 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Aug 2020 21:08:53 +0200 Subject: [PATCH 112/122] ipv4: tunnel: fix compilation on ARCH=um With certain configurations, a 64-bit ARCH=um errors out here with an unknown csum_ipv6_magic() function. Include the right header file to always have it. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 75c6013ff9a4..5cb30b7aa752 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -38,6 +38,7 @@ #include #include #include +#include const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; From b1eef236f50ba6afea680da039ef3a2ca9c43d11 Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Mon, 10 Aug 2020 17:42:40 -0700 Subject: [PATCH 113/122] i2c: iproc: fix race between client unreg and isr When i2c client unregisters, synchronize irq before setting iproc_i2c->slave to NULL. (1) disable_irq() (2) Mask event enable bits in control reg (3) Erase slave address (avoid further writes to rx fifo) (4) Flush tx and rx FIFOs (5) Clear pending event (interrupt) bits in status reg (6) enable_irq() (7) Set client pointer to NULL Unable to handle kernel NULL pointer dereference at virtual address 0000000000000318 [ 371.020421] pc : bcm_iproc_i2c_isr+0x530/0x11f0 [ 371.025098] lr : __handle_irq_event_percpu+0x6c/0x170 [ 371.030309] sp : ffff800010003e40 [ 371.033727] x29: ffff800010003e40 x28: 0000000000000060 [ 371.039206] x27: ffff800010ca9de0 x26: ffff800010f895df [ 371.044686] x25: ffff800010f18888 x24: ffff0008f7ff3600 [ 371.050165] x23: 0000000000000003 x22: 0000000001600000 [ 371.055645] x21: ffff800010f18888 x20: 0000000001600000 [ 371.061124] x19: ffff0008f726f080 x18: 0000000000000000 [ 371.066603] x17: 0000000000000000 x16: 0000000000000000 [ 371.072082] x15: 0000000000000000 x14: 0000000000000000 [ 371.077561] x13: 0000000000000000 x12: 0000000000000001 [ 371.083040] x11: 0000000000000000 x10: 0000000000000040 [ 371.088519] x9 : ffff800010f317c8 x8 : ffff800010f317c0 [ 371.093999] x7 : ffff0008f805b3b0 x6 : 0000000000000000 [ 371.099478] x5 : ffff0008f7ff36a4 x4 : ffff8008ee43d000 [ 371.104957] x3 : 0000000000000000 x2 : ffff8000107d64c0 [ 371.110436] x1 : 00000000c00000af x0 : 0000000000000000 [ 371.115916] Call trace: [ 371.118439] bcm_iproc_i2c_isr+0x530/0x11f0 [ 371.122754] __handle_irq_event_percpu+0x6c/0x170 [ 371.127606] handle_irq_event_percpu+0x34/0x88 [ 371.132189] handle_irq_event+0x40/0x120 [ 371.136234] handle_fasteoi_irq+0xcc/0x1a0 [ 371.140459] generic_handle_irq+0x24/0x38 [ 371.144594] __handle_domain_irq+0x60/0xb8 [ 371.148820] gic_handle_irq+0xc0/0x158 [ 371.152687] el1_irq+0xb8/0x140 [ 371.155927] arch_cpu_idle+0x10/0x18 [ 371.159615] do_idle+0x204/0x290 [ 371.162943] cpu_startup_entry+0x24/0x60 [ 371.166990] rest_init+0xb0/0xbc [ 371.170322] arch_call_rest_init+0xc/0x14 [ 371.174458] start_kernel+0x404/0x430 Fixes: c245d94ed106 ("i2c: iproc: Add multi byte read-write support for slave mode") Signed-off-by: Dhananjay Phadke Reviewed-by: Florian Fainelli Acked-by: Ray Jui Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-bcm-iproc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index 8a3c98866fb7..688e92818821 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -1078,7 +1078,7 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave) if (!iproc_i2c->slave) return -EINVAL; - iproc_i2c->slave = NULL; + disable_irq(iproc_i2c->irq); /* disable all slave interrupts */ tmp = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET); @@ -1091,6 +1091,17 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave) tmp &= ~BIT(S_CFG_EN_NIC_SMB_ADDR3_SHIFT); iproc_i2c_wr_reg(iproc_i2c, S_CFG_SMBUS_ADDR_OFFSET, tmp); + /* flush TX/RX FIFOs */ + tmp = (BIT(S_FIFO_RX_FLUSH_SHIFT) | BIT(S_FIFO_TX_FLUSH_SHIFT)); + iproc_i2c_wr_reg(iproc_i2c, S_FIFO_CTRL_OFFSET, tmp); + + /* clear all pending slave interrupts */ + iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, ISR_MASK_SLAVE); + + iproc_i2c->slave = NULL; + + enable_irq(iproc_i2c->irq); + return 0; } From 592d751c1e174df5ff219946908b005eb48934b3 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Wed, 12 Aug 2020 20:37:01 +0100 Subject: [PATCH 114/122] net: stmmac: dwmac1000: provide multicast filter fallback If we don't have a hardware multicast filter available then instead of silently failing to listen for the requested ethernet broadcast addresses fall back to receiving all multicast packets, in a similar fashion to other drivers with no multicast filter. Cc: stable@vger.kernel.org Signed-off-by: Jonathan McDowell Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index efc6ec1b8027..fc8759f146c7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -164,6 +164,9 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, value = GMAC_FRAME_FILTER_PR | GMAC_FRAME_FILTER_PCF; } else if (dev->flags & IFF_ALLMULTI) { value = GMAC_FRAME_FILTER_PM; /* pass all multi */ + } else if (!netdev_mc_empty(dev) && (mcbitslog2 == 0)) { + /* Fall back to all multicast if we've no filter */ + value = GMAC_FRAME_FILTER_PM; } else if (!netdev_mc_empty(dev)) { struct netdev_hw_addr *ha; From df43dd526e6609769ae513a81443c7aa727c8ca3 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Wed, 12 Aug 2020 20:37:23 +0100 Subject: [PATCH 115/122] net: ethernet: stmmac: Disable hardware multicast filter The IPQ806x does not appear to have a functional multicast ethernet address filter. This was observed as a failure to correctly receive IPv6 packets on a LAN to the all stations address. Checking the vendor driver shows that it does not attempt to enable the multicast filter and instead falls back to receiving all multicast packets, internally setting ALLMULTI. Use the new fallback support in the dwmac1000 driver to correctly achieve the same with the mainline IPQ806x driver. Confirmed to fix IPv6 functionality on an RB3011 router. Cc: stable@vger.kernel.org Signed-off-by: Jonathan McDowell Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 02102c781a8c..bf3250e0e59c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -351,6 +351,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) plat_dat->has_gmac = true; plat_dat->bsp_priv = gmac; plat_dat->fix_mac_speed = ipq806x_gmac_fix_mac_speed; + plat_dat->multicast_filter_bins = 0; err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (err) From 2e0d8fef5f76bce0887c73b824d9e625a08e7406 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Aug 2020 18:34:40 -0700 Subject: [PATCH 116/122] net: accept an empty mask in /sys/class/net/*/queues/rx-*/rps_cpus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We must accept an empty mask in store_rps_map(), or we are not able to disable RPS on a queue. Fixes: 07bbecb34106 ("net: Restrict receive packets queuing to housekeeping CPUs") Signed-off-by: Eric Dumazet Reported-by: Maciej Żenczykowski Cc: Alex Belits Cc: Nitesh Narayan Lal Cc: Peter Zijlstra (Intel) Reviewed-by: Maciej Żenczykowski Acked-by: Peter Zijlstra (Intel) Acked-by: Nitesh Narayan Lal Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9de33b594ff2..efec66fa78b7 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -757,11 +757,13 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, return err; } - hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ; - cpumask_and(mask, mask, housekeeping_cpumask(hk_flags)); - if (cpumask_empty(mask)) { - free_cpumask_var(mask); - return -EINVAL; + if (!cpumask_empty(mask)) { + hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ; + cpumask_and(mask, mask, housekeeping_cpumask(hk_flags)); + if (cpumask_empty(mask)) { + free_cpumask_var(mask); + return -EINVAL; + } } map = kzalloc(max_t(unsigned int, From 9643609423c7667fb748cc3ccff023d761d0ac90 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 12 Aug 2020 13:26:37 -0700 Subject: [PATCH 117/122] Revert "ipv4: tunnel: fix compilation on ARCH=um" This reverts commit 06a7a37be55e29961c9ba2abec4d07c8e0e21861. The bug was already fixed, this added a dup include. Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 5cb30b7aa752..75c6013ff9a4 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -38,7 +38,6 @@ #include #include #include -#include const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; From 9f457179244a1c0316546b1760f8993d0d718861 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 13 Aug 2020 10:40:54 -0400 Subject: [PATCH 118/122] mm: memcontrol: fix warning when allocating the root cgroup Commit 3e38e0aaca9e ("mm: memcg: charge memcg percpu memory to the parent cgroup") adds memory tracking to the memcg kernel structures themselves to make cgroups liable for the memory they are consuming through the allocation of child groups (which can be significant). This code is a bit awkward as it's spread out through several functions: The outermost function does memalloc_use_memcg(parent) to set up current->active_memcg, which designates which cgroup to charge, and the inner functions pass GFP_ACCOUNT to request charging for specific allocations. To make sure this dependency is satisfied at all times - to make sure we don't randomly charge whoever is calling the functions - the inner functions warn on !current->active_memcg. However, this triggers a false warning when the root memcg itself is allocated. No parent exists in this case, and so current->active_memcg is rightfully NULL. It's a false positive, not indicative of a bug. Delete the warnings for now, we can revisit this later. Fixes: 3e38e0aaca9e ("mm: memcg: charge memcg percpu memory to the parent cgroup") Signed-off-by: Johannes Weiner Reported-by: Stephen Rothwell Acked-by: Roman Gushchin Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d59fd9af6e63..9d87082e64aa 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5137,9 +5137,6 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) if (!pn) return 1; - /* We charge the parent cgroup, never the current task */ - WARN_ON_ONCE(!current->active_memcg); - pn->lruvec_stat_local = alloc_percpu_gfp(struct lruvec_stat, GFP_KERNEL_ACCOUNT); if (!pn->lruvec_stat_local) { @@ -5222,9 +5219,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) goto fail; } - /* We charge the parent cgroup, never the current task */ - WARN_ON_ONCE(!current->active_memcg); - memcg->vmstats_local = alloc_percpu_gfp(struct memcg_vmstats_percpu, GFP_KERNEL_ACCOUNT); if (!memcg->vmstats_local) From 1edcd4675e44dc0e6e3b34e8e29000d8a05f998c Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Thu, 13 Aug 2020 14:02:20 +0800 Subject: [PATCH 119/122] Documentation/locking/locktypes: fix the typo We have three categories locks, not two. Signed-off-by: Huang Shijie Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200813060220.18199-1-sjhuang@iluvatar.ai Signed-off-by: Jonathan Corbet --- Documentation/locking/locktypes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst index 1b577a8bf982..4cefed8048ca 100644 --- a/Documentation/locking/locktypes.rst +++ b/Documentation/locking/locktypes.rst @@ -10,7 +10,7 @@ Introduction ============ The kernel provides a variety of locking primitives which can be divided -into two categories: +into three categories: - Sleeping locks - CPU local locks From 94c7eb54c4b8e81618ec79f414fe1ca5767f9720 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Aug 2020 10:06:43 -0700 Subject: [PATCH 120/122] random32: add a tracepoint for prandom_u32() There has been some heat around prandom_u32() lately, and some people were wondering if there was a simple way to determine how often it was used, before considering making it maybe 10 times more expensive. This tracepoint exports the generated pseudo random value. Tested: perf list | grep prandom_u32 random:prandom_u32 [Tracepoint event] perf record -a [-g] [-C1] -e random:prandom_u32 sleep 1 [ perf record: Woken up 0 times to write data ] [ perf record: Captured and wrote 259.748 MB perf.data (924087 samples) ] perf report --nochildren ... 97.67% ksoftirqd/1 [kernel.vmlinux] [k] prandom_u32 | ---prandom_u32 prandom_u32 | |--48.86%--tcp_v4_syn_recv_sock | tcp_check_req | tcp_v4_rcv | ... --48.81%--tcp_conn_request tcp_v4_conn_request tcp_rcv_state_process ... perf script Signed-off-by: Eric Dumazet Cc: Willy Tarreau Cc: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: David S. Miller --- include/trace/events/random.h | 17 +++++++++++++++++ lib/random32.c | 2 ++ 2 files changed, 19 insertions(+) diff --git a/include/trace/events/random.h b/include/trace/events/random.h index 32c10a515e2d..9570a10cb949 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -307,6 +307,23 @@ TRACE_EVENT(urandom_read, __entry->pool_left, __entry->input_left) ); +TRACE_EVENT(prandom_u32, + + TP_PROTO(unsigned int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( unsigned int, ret) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret=%u" , __entry->ret) +); + #endif /* _TRACE_RANDOM_H */ /* This part must be outside protection */ diff --git a/lib/random32.c b/lib/random32.c index 3d749abb9e80..932345323af0 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef CONFIG_RANDOM32_SELFTEST static void __init prandom_state_selftest(void); @@ -82,6 +83,7 @@ u32 prandom_u32(void) u32 res; res = prandom_u32_state(state); + trace_prandom_u32(res); put_cpu_var(net_rand_state); return res; From 88fd1cb80daa20af063bce81e1fad14e945a8dc4 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 13 Aug 2020 21:45:25 +0206 Subject: [PATCH 121/122] af_packet: TPACKET_V3: fix fill status rwlock imbalance After @blk_fill_in_prog_lock is acquired there is an early out vnet situation that can occur. In that case, the rwlock needs to be released. Also, since @blk_fill_in_prog_lock is only acquired when @tp_version is exactly TPACKET_V3, only release it on that exact condition as well. And finally, add sparse annotation so that it is clearer that prb_fill_curr_block() and prb_clear_blk_fill_status() are acquiring and releasing @blk_fill_in_prog_lock, respectively. sparse is still unable to understand the balance, but the warnings are now on a higher level that make more sense. Fixes: 632ca50f2cbd ("af_packet: TPACKET_V3: replace busy-wait loop") Signed-off-by: John Ogness Reported-by: kernel test robot Signed-off-by: David S. Miller --- net/packet/af_packet.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0b8160d1a6e0..479c257ded73 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -941,6 +941,7 @@ static int prb_queue_frozen(struct tpacket_kbdq_core *pkc) } static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) + __releases(&pkc->blk_fill_in_prog_lock) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); @@ -989,6 +990,7 @@ static void prb_fill_curr_block(char *curr, struct tpacket_kbdq_core *pkc, struct tpacket_block_desc *pbd, unsigned int len) + __acquires(&pkc->blk_fill_in_prog_lock) { struct tpacket3_hdr *ppd; @@ -2286,8 +2288,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (do_vnet && virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) + vio_le(), true, 0)) { + if (po->tp_version == TPACKET_V3) + prb_clear_blk_fill_status(&po->rx_ring); goto drop_n_account; + } if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); @@ -2393,7 +2398,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, __clear_bit(slot_id, po->rx_ring.rx_owner_map); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); - } else { + } else if (po->tp_version == TPACKET_V3) { prb_clear_blk_fill_status(&po->rx_ring); } From 1f3a090b9033f69de380c03db3ea1a1015c850cf Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Wed, 12 Aug 2020 17:56:39 +0800 Subject: [PATCH 122/122] net: openvswitch: introduce common code for flushing flows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid some issues, for example RCU usage warning and double free, we should flush the flows under ovs_lock. This patch refactors table_instance_destroy and introduces table_instance_flow_flush which can be invoked by __dp_destroy or ovs_flow_tbl_flush. Fixes: 50b0e61b32ee ("net: openvswitch: fix possible memleak on destroy flow-table") Reported-by: Johan Knöös Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2020-August/050489.html Signed-off-by: Tonghao Zhang Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 10 +++++++++- net/openvswitch/flow_table.c | 35 +++++++++++++++-------------------- net/openvswitch/flow_table.h | 3 +++ 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 42f8cc70bb2c..6e47ef7ef036 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1756,6 +1756,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) /* Called with ovs_mutex. */ static void __dp_destroy(struct datapath *dp) { + struct flow_table *table = &dp->table; int i; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { @@ -1774,7 +1775,14 @@ static void __dp_destroy(struct datapath *dp) */ ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); - /* RCU destroy the flow table */ + /* Flush sw_flow in the tables. RCU cb only releases resource + * such as dp, ports and tables. That may avoid some issues + * such as RCU usage warning. + */ + table_instance_flow_flush(table, ovsl_dereference(table->ti), + ovsl_dereference(table->ufid_ti)); + + /* RCU destroy the ports, meters and flow tables. */ call_rcu(&dp->rcu, destroy_dp_rcu); } diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 8c12675cbb67..e2235849a57e 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -473,19 +473,15 @@ static void table_instance_flow_free(struct flow_table *table, flow_mask_remove(table, flow->mask); } -static void table_instance_destroy(struct flow_table *table, - struct table_instance *ti, - struct table_instance *ufid_ti, - bool deferred) +/* Must be called with OVS mutex held. */ +void table_instance_flow_flush(struct flow_table *table, + struct table_instance *ti, + struct table_instance *ufid_ti) { int i; - if (!ti) - return; - - BUG_ON(!ufid_ti); if (ti->keep_flows) - goto skip_flows; + return; for (i = 0; i < ti->n_buckets; i++) { struct sw_flow *flow; @@ -497,18 +493,16 @@ static void table_instance_destroy(struct flow_table *table, table_instance_flow_free(table, ti, ufid_ti, flow, false); - ovs_flow_free(flow, deferred); + ovs_flow_free(flow, true); } } +} -skip_flows: - if (deferred) { - call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); - call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); - } else { - __table_instance_destroy(ti); - __table_instance_destroy(ufid_ti); - } +static void table_instance_destroy(struct table_instance *ti, + struct table_instance *ufid_ti) +{ + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); + call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); } /* No need for locking this function is called from RCU callback or @@ -523,7 +517,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) call_rcu(&mc->rcu, mask_cache_rcu_cb); call_rcu(&ma->rcu, mask_array_rcu_cb); - table_instance_destroy(table, ti, ufid_ti, false); + table_instance_destroy(ti, ufid_ti); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -641,7 +635,8 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) flow_table->count = 0; flow_table->ufid_count = 0; - table_instance_destroy(flow_table, old_ti, old_ufid_ti, true); + table_instance_flow_flush(flow_table, old_ti, old_ufid_ti); + table_instance_destroy(old_ti, old_ufid_ti); return 0; err_free_ti: diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 74ce48fecba9..6e7d4ac59353 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -105,5 +105,8 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, bool full, const struct sw_flow_mask *mask); void ovs_flow_masks_rebalance(struct flow_table *table); +void table_instance_flow_flush(struct flow_table *table, + struct table_instance *ti, + struct table_instance *ufid_ti); #endif /* flow_table.h */