From a3d13a0a23ea20a83d7148279a0ab68cc9a5962c Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Thu, 16 Jul 2020 11:46:24 +0200 Subject: [PATCH 001/243] Replace HTTP links with HTTPS ones: DMI/SMBIOS SUPPORT Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Jean Delvare --- drivers/firmware/dmi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 5066d1f1d687..d51ca0428bb8 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -21,7 +21,7 @@ EXPORT_SYMBOL_GPL(dmi_kobj); /* * DMI stands for "Desktop Management Interface". It is part * of and an antecedent to, SMBIOS, which stands for System - * Management BIOS. See further: http://www.dmtf.org/standards + * Management BIOS. See further: https://www.dmtf.org/standards */ static const char dmi_empty_string[] = ""; From a4423cedc56fd16405240243bdfe6d02823cb26a Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 7 Aug 2020 15:00:05 +0200 Subject: [PATCH 002/243] eeprom: at24: Add support for the Sony VAIO EEPROMs Special handling of the Sony VAIO EEPROMs is the last feature of the legacy eeprom driver that the at24 driver does not support. Adding this would let us deprecate and eventually remove the legacy eeprom driver. So add the option to specify a post-processing callback function that is called after reading data from the EEPROM, before it is returned to the user. The 24c02-vaio type is the first use case of that option: the callback function will mask the sensitive data for non-root users exactly as the legacy eeprom driver was doing. Signed-off-by: Jean Delvare Cc: Bartosz Golaszewski Cc: Arnd Bergmann Cc: Greg Kroah-Hartman [Bartosz: removed a stray newline] Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 2591c21b2b5d..fb0b8375d5ae 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -89,6 +90,7 @@ struct at24_data { struct nvmem_device *nvmem; struct regulator *vcc_reg; + void (*read_post)(unsigned int off, char *buf, size_t count); /* * Some chips tie up multiple I2C addresses; dummy devices reserve @@ -121,6 +123,7 @@ MODULE_PARM_DESC(at24_write_timeout, "Time (in ms) to try writes (default 25)"); struct at24_chip_data { u32 byte_len; u8 flags; + void (*read_post)(unsigned int off, char *buf, size_t count); }; #define AT24_CHIP_DATA(_name, _len, _flags) \ @@ -128,6 +131,32 @@ struct at24_chip_data { .byte_len = _len, .flags = _flags, \ } +#define AT24_CHIP_DATA_CB(_name, _len, _flags, _read_post) \ + static const struct at24_chip_data _name = { \ + .byte_len = _len, .flags = _flags, \ + .read_post = _read_post, \ + } + +static void at24_read_post_vaio(unsigned int off, char *buf, size_t count) +{ + int i; + + if (capable(CAP_SYS_ADMIN)) + return; + + /* + * Hide VAIO private settings to regular users: + * - BIOS passwords: bytes 0x00 to 0x0f + * - UUID: bytes 0x10 to 0x1f + * - Serial number: 0xc0 to 0xdf + */ + for (i = 0; i < count; i++) { + if ((off + i <= 0x1f) || + (off + i >= 0xc0 && off + i <= 0xdf)) + buf[i] = 0; + } +} + /* needs 8 addresses as A0-A2 are ignored */ AT24_CHIP_DATA(at24_data_24c00, 128 / 8, AT24_FLAG_TAKE8ADDR); /* old variants can't be handled with this generic entry! */ @@ -144,6 +173,10 @@ AT24_CHIP_DATA(at24_data_24mac602, 64 / 8, /* spd is a 24c02 in memory DIMMs */ AT24_CHIP_DATA(at24_data_spd, 2048 / 8, AT24_FLAG_READONLY | AT24_FLAG_IRUGO); +/* 24c02_vaio is a 24c02 on some Sony laptops */ +AT24_CHIP_DATA_CB(at24_data_24c02_vaio, 2048 / 8, + AT24_FLAG_READONLY | AT24_FLAG_IRUGO, + at24_read_post_vaio); AT24_CHIP_DATA(at24_data_24c04, 4096 / 8, 0); AT24_CHIP_DATA(at24_data_24cs04, 16, AT24_FLAG_SERIAL | AT24_FLAG_READONLY); @@ -177,6 +210,7 @@ static const struct i2c_device_id at24_ids[] = { { "24mac402", (kernel_ulong_t)&at24_data_24mac402 }, { "24mac602", (kernel_ulong_t)&at24_data_24mac602 }, { "spd", (kernel_ulong_t)&at24_data_spd }, + { "24c02-vaio", (kernel_ulong_t)&at24_data_24c02_vaio }, { "24c04", (kernel_ulong_t)&at24_data_24c04 }, { "24cs04", (kernel_ulong_t)&at24_data_24cs04 }, { "24c08", (kernel_ulong_t)&at24_data_24c08 }, @@ -389,6 +423,9 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) struct device *dev; char *buf = val; int ret; + unsigned int orig_off = off; + char *orig_buf = buf; + size_t orig_count = count; at24 = priv; dev = at24_base_client_dev(at24); @@ -427,6 +464,9 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) pm_runtime_put(dev); + if (unlikely(at24->read_post)) + at24->read_post(orig_off, orig_buf, orig_count); + return 0; } @@ -654,6 +694,7 @@ static int at24_probe(struct i2c_client *client) at24->byte_len = byte_len; at24->page_size = page_size; at24->flags = flags; + at24->read_post = cdata->read_post; at24->num_addresses = num_addresses; at24->offset_adj = at24_get_offset_adj(flags, byte_len); at24->client[0].client = client; From c254bcd7231a3eeafc453f6ee3a483a2e7ff486e Mon Sep 17 00:00:00 2001 From: Victor Ding Date: Fri, 14 Aug 2020 19:17:30 +1000 Subject: [PATCH 003/243] rtc: cmos: zero-init wkalrm when reading from CMOS cmos_read_alarm() may leave certain fields of a struct rtc_wkalrm untouched; therefore, these fields contain garbage if not properly initialized, leading to inconsistent values when converting into time64_t. This patch to zero initialize the struct before calling cmos_read_alarm(). Note that this patch is not intended to produce a correct time64_t, it is only to produce a consistent value. In the case of suspend/resume, a correct time64_t is not necessary; a consistent value is sufficient to correctly perform an equality test for t_current_expires and t_saved_expires. Logic to deduce a correct time64_t is expensive and hence should be avoided. Signed-off-by: Victor Ding Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200814191654.v2.1.Iaf7638a2f2a87ff68d85fcb8dec615e41340c97f@changeid --- drivers/rtc/rtc-cmos.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index bcc96ab7793f..c633319cdb91 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -1006,6 +1006,7 @@ static int cmos_suspend(struct device *dev) enable_irq_wake(cmos->irq); } + memset(&cmos->saved_wkalrm, 0, sizeof(struct rtc_wkalrm)); cmos_read_alarm(dev, &cmos->saved_wkalrm); dev_dbg(dev, "suspend%s, ctrl %02x\n", @@ -1054,6 +1055,7 @@ static void cmos_check_wkalrm(struct device *dev) return; } + memset(¤t_alarm, 0, sizeof(struct rtc_wkalrm)); cmos_read_alarm(dev, ¤t_alarm); t_current_expires = rtc_tm_to_time64(¤t_alarm.time); t_saved_expires = rtc_tm_to_time64(&cmos->saved_wkalrm.time); From fc9656a370499e5a32425b715f8fed241e832458 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 14 Aug 2020 13:07:29 +0200 Subject: [PATCH 004/243] rtc: rtc-rs5c313: Drop obsolete platform_set_drvdata() call Commit 284e2fa1da00a998 ("rtc: rtc-rs5c313: use devm_rtc_device_register()"), removed the last user of the driver-specific data. Hence there is no longer a need to set it. Signed-off-by: Geert Uytterhoeven Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200814110731.29029-2-geert+renesas@glider.be --- drivers/rtc/rtc-rs5c313.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c index 89f38e3e917d..00b5ef753935 100644 --- a/drivers/rtc/rtc-rs5c313.c +++ b/drivers/rtc/rtc-rs5c313.c @@ -369,12 +369,7 @@ static int rs5c313_rtc_probe(struct platform_device *pdev) struct rtc_device *rtc = devm_rtc_device_register(&pdev->dev, "rs5c313", &rs5c313_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc)) - return PTR_ERR(rtc); - - platform_set_drvdata(pdev, rtc); - - return 0; + return PTR_ERR_OR_ZERO(rtc); } static struct platform_driver rs5c313_rtc_platform_driver = { From f65e727464d7c0090f05548e8f323779eaa97eda Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 14 Aug 2020 13:07:30 +0200 Subject: [PATCH 005/243] rtc: rtc-rs5c313: Fix late hardware init rs5c313_rtc_init() calls platform_driver_register(), and initializes the hardware. This is wrong because of two reasons: 1. As soon as the driver has been registered, the device may be probed. If devm_rtc_device_register() is called before hardware initialization, reading the current time will fail: rs5c313 rs5c313: rs5c313_rtc_read_time: timeout error rs5c313 rs5c313: registered as rtc0 rs5c313 rs5c313: rs5c313_rtc_read_time: timeout error rs5c313 rs5c313: hctosys: unable to read the hardware clock 2. If the platform device does not exist, the driver will still write to a hardware device that may not be present. Fix this by moving the hardware initialization sequence to the driver's .probe() method. Signed-off-by: Geert Uytterhoeven Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200814110731.29029-3-geert+renesas@glider.be --- drivers/rtc/rtc-rs5c313.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c index 00b5ef753935..af72e428b218 100644 --- a/drivers/rtc/rtc-rs5c313.c +++ b/drivers/rtc/rtc-rs5c313.c @@ -366,8 +366,13 @@ static const struct rtc_class_ops rs5c313_rtc_ops = { static int rs5c313_rtc_probe(struct platform_device *pdev) { - struct rtc_device *rtc = devm_rtc_device_register(&pdev->dev, "rs5c313", - &rs5c313_rtc_ops, THIS_MODULE); + struct rtc_device *rtc; + + rs5c313_init_port(); + rs5c313_check_xstp_bit(); + + rtc = devm_rtc_device_register(&pdev->dev, "rs5c313", &rs5c313_rtc_ops, + THIS_MODULE); return PTR_ERR_OR_ZERO(rtc); } @@ -381,16 +386,7 @@ static struct platform_driver rs5c313_rtc_platform_driver = { static int __init rs5c313_rtc_init(void) { - int err; - - err = platform_driver_register(&rs5c313_rtc_platform_driver); - if (err) - return err; - - rs5c313_init_port(); - rs5c313_check_xstp_bit(); - - return 0; + return platform_driver_register(&rs5c313_rtc_platform_driver); } static void __exit rs5c313_rtc_exit(void) From 163a512cd929d6db712a3021720362749653998b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 14 Aug 2020 13:07:31 +0200 Subject: [PATCH 006/243] rtc: rtc-rs5c313: Convert to module_platform_driver() Reduce boilerplate by using the module_platform_driver() helper. Signed-off-by: Geert Uytterhoeven Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200814110731.29029-4-geert+renesas@glider.be --- drivers/rtc/rtc-rs5c313.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c index af72e428b218..e98f85f34206 100644 --- a/drivers/rtc/rtc-rs5c313.c +++ b/drivers/rtc/rtc-rs5c313.c @@ -384,18 +384,7 @@ static struct platform_driver rs5c313_rtc_platform_driver = { .probe = rs5c313_rtc_probe, }; -static int __init rs5c313_rtc_init(void) -{ - return platform_driver_register(&rs5c313_rtc_platform_driver); -} - -static void __exit rs5c313_rtc_exit(void) -{ - platform_driver_unregister(&rs5c313_rtc_platform_driver); -} - -module_init(rs5c313_rtc_init); -module_exit(rs5c313_rtc_exit); +module_platform_driver(rs5c313_rtc_platform_driver); MODULE_AUTHOR("kogiidena , Nobuhiro Iwamatsu "); MODULE_DESCRIPTION("Ricoh RS5C313 RTC device driver"); From 59ed0127155201863db49f3dc5fb41316433340a Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Mon, 17 Aug 2020 11:57:31 +1200 Subject: [PATCH 007/243] rtc: ds1307: Ensure oscillator is enabled for DS1388 Similar to the other variants the DS1388 has a bit to stop the oscillator to reduce the power consumption from VBAT. Ensure that the oscillator is enabled when the system is up. Signed-off-by: Chris Packham Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200816235731.21071-1-chris.packham@alliedtelesis.co.nz --- drivers/rtc/rtc-ds1307.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 54c85cdd019d..2182f4e97c0a 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -153,6 +153,7 @@ enum ds_type { #define DS1388_REG_CONTROL 0x0c # define DS1388_BIT_RST BIT(0) # define DS1388_BIT_WDE BIT(1) +# define DS1388_BIT_nEOSC BIT(7) /* negative offset step is -2.034ppm */ #define M41TXX_NEG_OFFSET_STEP_PPB 2034 @@ -1881,6 +1882,19 @@ static int ds1307_probe(struct i2c_client *client, DS1307_REG_HOUR << 4 | 0x08, hour); } break; + case ds_1388: + err = regmap_read(ds1307->regmap, DS1388_REG_CONTROL, &tmp); + if (err) { + dev_dbg(ds1307->dev, "read error %d\n", err); + goto exit; + } + + /* oscillator off? turn it on, so clock can tick. */ + if (tmp & DS1388_BIT_nEOSC) { + tmp &= ~DS1388_BIT_nEOSC; + regmap_write(ds1307->regmap, DS1388_REG_CONTROL, tmp); + } + break; default: break; } From f471b05f76e4b1b6ba07ebc7681920a5c5b97c5d Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Tue, 18 Aug 2020 13:35:43 +1200 Subject: [PATCH 008/243] rtc: ds1307: Clear OSF flag on DS1388 when setting time Ensure the OSF flag is cleared on the DS1388 when the clock is set. Fixes: df11b323b16f ("rtc: ds1307: handle oscillator failure flags for ds1388 variant") Signed-off-by: Chris Packham Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200818013543.4283-1-chris.packham@alliedtelesis.co.nz --- drivers/rtc/rtc-ds1307.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 2182f4e97c0a..8f4ddbaa2052 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -353,6 +353,10 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t) regmap_update_bits(ds1307->regmap, DS1340_REG_FLAG, DS1340_BIT_OSF, 0); break; + case ds_1388: + regmap_update_bits(ds1307->regmap, DS1388_REG_FLAG, + DS1388_BIT_OSF, 0); + break; case mcp794xx: /* * these bits were cleared when preparing the date/time From 0d982de3e27e8091dfa62368cd3eefbc7c17c8a2 Mon Sep 17 00:00:00 2001 From: Peng Ma Date: Tue, 18 Aug 2020 14:36:09 +0800 Subject: [PATCH 009/243] rtc: fsl-ftm-alarm: update acpi device id Original device id would conflict with crypto driver, change it. Signed-off-by: Peng Ma Signed-off-by: Ran Wang Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200818063609.39859-1-ran.wang_1@nxp.com --- drivers/rtc/rtc-fsl-ftm-alarm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-fsl-ftm-alarm.c b/drivers/rtc/rtc-fsl-ftm-alarm.c index 68f0a1801a2e..48d3b38ea348 100644 --- a/drivers/rtc/rtc-fsl-ftm-alarm.c +++ b/drivers/rtc/rtc-fsl-ftm-alarm.c @@ -3,7 +3,7 @@ * Freescale FlexTimer Module (FTM) alarm device driver. * * Copyright 2014 Freescale Semiconductor, Inc. - * Copyright 2019 NXP + * Copyright 2019-2020 NXP * */ @@ -312,7 +312,7 @@ static const struct of_device_id ftm_rtc_match[] = { }; static const struct acpi_device_id ftm_imx_acpi_ids[] = { - {"NXP0011",}, + {"NXP0014",}, { } }; MODULE_DEVICE_TABLE(acpi, ftm_imx_acpi_ids); From ab70935d37bbd2637125bea489308b896b2756de Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 20 Aug 2020 11:05:25 +0300 Subject: [PATCH 010/243] i2c: Remove 'default n' from busses/Kconfig The default value for a config option defaults to 'n' so it doesn't need to be set here. Signed-off-by: Jarkko Nikula Reviewed-by: Jean Delvare [jdelvare: found another one] Signed-off-by: Wolfram Sang --- drivers/i2c/Kconfig | 1 - drivers/i2c/busses/Kconfig | 3 --- 2 files changed, 4 deletions(-) diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index bae1dc08ec9a..5449729cdb87 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -101,7 +101,6 @@ source "drivers/i2c/busses/Kconfig" config I2C_STUB tristate "I2C/SMBus Test Stub" depends on m - default 'n' help This module may be useful to developers of SMBus client drivers, especially for certain kinds of sensor chips. diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 293e7a0760e7..5969d111b34f 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -840,7 +840,6 @@ config I2C_PASEMI config I2C_PCA_PLATFORM tristate "PCA9564/PCA9665 as platform device" select I2C_ALGOPCA - default n help This driver supports a memory mapped Philips PCA9564/PCA9665 parallel bus to I2C bus controller. @@ -1240,7 +1239,6 @@ config I2C_TAOS_EVM depends on TTY select SERIO select SERIO_SERPORT - default n help This supports TAOS evaluation modules on serial port. In order to use this driver, you will need the inputattach tool, which is part @@ -1324,7 +1322,6 @@ config I2C_PCA_ISA tristate "PCA9564/PCA9665 on an ISA bus" depends on ISA select I2C_ALGOPCA - default n help This driver supports ISA boards using the Philips PCA9564/PCA9665 parallel bus to I2C bus controller. From 99363d1c26c825055f8a879d9d5c2b78168cf655 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 25 Aug 2020 09:20:37 +0200 Subject: [PATCH 011/243] eeprom: at24: Tidy at24_read() The elegant code in at24_read() has the drawback that we now need to make a copy of all parameters to pass them to the post-processing callback function if there is one. Rewrite the loop in such a way that the parameters are not modified, so saving them is no longer needed. Signed-off-by: Jean Delvare Cc: Bartosz Golaszewski Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index fb0b8375d5ae..8f5de5f10bbe 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -422,10 +422,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) struct at24_data *at24; struct device *dev; char *buf = val; - int ret; - unsigned int orig_off = off; - char *orig_buf = buf; - size_t orig_count = count; + int i, ret; at24 = priv; dev = at24_base_client_dev(at24); @@ -448,16 +445,13 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) */ mutex_lock(&at24->lock); - while (count) { - ret = at24_regmap_read(at24, buf, off, count); + for (i = 0; count; i += ret, count -= ret) { + ret = at24_regmap_read(at24, buf + i, off + i, count); if (ret < 0) { mutex_unlock(&at24->lock); pm_runtime_put(dev); return ret; } - buf += ret; - off += ret; - count -= ret; } mutex_unlock(&at24->lock); @@ -465,7 +459,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) pm_runtime_put(dev); if (unlikely(at24->read_post)) - at24->read_post(orig_off, orig_buf, orig_count); + at24->read_post(off, buf, i); return 0; } From 0a9336ee133deb39f962e16b5eca2a48fec4eb52 Mon Sep 17 00:00:00 2001 From: Jaakko Laine Date: Thu, 27 Aug 2020 17:48:46 +0300 Subject: [PATCH 012/243] i2c: xiic: Change code alignment to 1 space only Alignment removed and replaced with 1 space only to reduce need for future alignment changes affecting multiple lines, when new variables are added. Signed-off-by: Jaakko Laine Acked-by: Michal Simek Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xiic.c | 48 +++++++++++++++++------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 90c1c362394d..10380531d45c 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -46,33 +46,33 @@ enum xiic_endian { /** * struct xiic_i2c - Internal representation of the XIIC I2C bus - * @dev: Pointer to device structure - * @base: Memory base of the HW registers - * @wait: Wait queue for callers - * @adap: Kernel adapter representation - * @tx_msg: Messages from above to be sent - * @lock: Mutual exclusion - * @tx_pos: Current pos in TX message - * @nmsgs: Number of messages in tx_msg - * @state: See STATE_ - * @rx_msg: Current RX message - * @rx_pos: Position within current RX message + * @dev: Pointer to device structure + * @base: Memory base of the HW registers + * @wait: Wait queue for callers + * @adap: Kernel adapter representation + * @tx_msg: Messages from above to be sent + * @lock: Mutual exclusion + * @tx_pos: Current pos in TX message + * @nmsgs: Number of messages in tx_msg + * @state: See STATE_ + * @rx_msg: Current RX message + * @rx_pos: Position within current RX message * @endianness: big/little-endian byte order - * @clk: Pointer to AXI4-lite input clock + * @clk: Pointer to AXI4-lite input clock */ struct xiic_i2c { - struct device *dev; - void __iomem *base; - wait_queue_head_t wait; - struct i2c_adapter adap; - struct i2c_msg *tx_msg; - struct mutex lock; - unsigned int tx_pos; - unsigned int nmsgs; - enum xilinx_i2c_state state; - struct i2c_msg *rx_msg; - int rx_pos; - enum xiic_endian endianness; + struct device *dev; + void __iomem *base; + wait_queue_head_t wait; + struct i2c_adapter adap; + struct i2c_msg *tx_msg; + struct mutex lock; + unsigned int tx_pos; + unsigned int nmsgs; + enum xilinx_i2c_state state; + struct i2c_msg *rx_msg; + int rx_pos; + enum xiic_endian endianness; struct clk *clk; }; From 9106e45ceaafd3c3a7264c68dc5dc08874f6c8ce Mon Sep 17 00:00:00 2001 From: Jaakko Laine Date: Thu, 27 Aug 2020 17:48:47 +0300 Subject: [PATCH 013/243] i2c: xiic: Improve struct memory alignment xiic_i2c struct alignment causes the struct to take more space in memory than strictly required. Move state -member to end of struct to get less padding. Signed-off-by: Jaakko Laine Suggested-by: Michal Simek Acked-by: Michal Simek Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xiic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 10380531d45c..1453d82bb664 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -54,11 +54,11 @@ enum xiic_endian { * @lock: Mutual exclusion * @tx_pos: Current pos in TX message * @nmsgs: Number of messages in tx_msg - * @state: See STATE_ * @rx_msg: Current RX message * @rx_pos: Position within current RX message * @endianness: big/little-endian byte order * @clk: Pointer to AXI4-lite input clock + * @state: See STATE_ */ struct xiic_i2c { struct device *dev; @@ -69,11 +69,11 @@ struct xiic_i2c { struct mutex lock; unsigned int tx_pos; unsigned int nmsgs; - enum xilinx_i2c_state state; struct i2c_msg *rx_msg; int rx_pos; enum xiic_endian endianness; struct clk *clk; + enum xilinx_i2c_state state; }; From 9e3b184b3b4fff3451b4a4089a727988ca14bee3 Mon Sep 17 00:00:00 2001 From: Jaakko Laine Date: Thu, 27 Aug 2020 17:48:48 +0300 Subject: [PATCH 014/243] i2c: xiic: Support forcing single-master in DT I2C master operating in multimaster mode can get stuck indefinitely if I2C start is detected on bus, but no master has a transaction going. This is a weakness in I2C standard, which defines no way to recover, since all masters are indefinitely disallowed from interrupting the currently operating master. A start condition can be created for example by an electromagnetic discharge applied near physical I2C lines. Or a already operating master could get reset immediately after sending a start. If it is known during device tree creation that only a single I2C master will be present on the bus, this deadlock of the I2C bus could be avoided in the driver by ignoring the bus_is_busy register of the xiic, since bus can never be reserved by any other master. This patch adds this support for detecting single-master flag in device tree and when provided, improves I2C reliability by ignoring the therefore unnecessary xiic bus_is_busy register. Error can be reproduced by pulling I2C SDA -line temporarily low by shorting it to ground, while linux I2C master is operating on it using the xiic driver. The application using the bus will start receiving linux error code 16: "Device or resource busy" indefinitely: kernel: pca953x 0-0020: failed writing register app: Error writing file, error: 16 With multi-master disabled device will instead receive error code 5: "I/O error" while SDA is grounded, but recover normal operation once short is removed. kernel: pca953x 0-0020: failed reading register app: Error reading file, error: 5 Signed-off-by: Jaakko Laine Acked-by: Michal Simek Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xiic.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 1453d82bb664..087b2951942e 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -59,6 +59,7 @@ enum xiic_endian { * @endianness: big/little-endian byte order * @clk: Pointer to AXI4-lite input clock * @state: See STATE_ + * @singlemaster: Indicates bus is single master */ struct xiic_i2c { struct device *dev; @@ -74,6 +75,7 @@ struct xiic_i2c { enum xiic_endian endianness; struct clk *clk; enum xilinx_i2c_state state; + bool singlemaster; }; @@ -526,6 +528,15 @@ static int xiic_busy(struct xiic_i2c *i2c) if (i2c->tx_msg) return -EBUSY; + /* In single master mode bus can only be busy, when in use by this + * driver. If the register indicates bus being busy for some reason we + * should ignore it, since bus will never be released and i2c will be + * stuck forever. + */ + if (i2c->singlemaster) { + return 0; + } + /* for instance if previous transfer was terminated due to TX error * it might be that the bus is on it's way to become available * give it at most 3 ms to wake @@ -811,6 +822,9 @@ static int xiic_i2c_probe(struct platform_device *pdev) goto err_clk_dis; } + i2c->singlemaster = + of_property_read_bool(pdev->dev.of_node, "single-master"); + /* * Detect endianness * Try to reset the TX FIFO. Then check the EMPTY flag. If it is not From 5b9bacf28a973a6b16510493416baeefa2c06289 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Thu, 27 Aug 2020 11:23:30 +0200 Subject: [PATCH 015/243] i2c: rcar: Auto select RESET_CONTROLLER The i2c-rcar driver utilizes the Generic Reset Controller kernel feature, so select the RESET_CONTROLLER option when the I2C_RCAR option is selected with a Gen3 SoC. Fixes: 2b16fd63059ab9 ("i2c: rcar: handle RXDMA HW behaviour on Gen3") Signed-off-by: Dirk Behme Signed-off-by: Andy Lowe [erosca: Add "if ARCH_RCAR_GEN3" per Wolfram's request] Signed-off-by: Eugeniu Rosca Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 5969d111b34f..38639d53a171 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -1180,6 +1180,7 @@ config I2C_RCAR tristate "Renesas R-Car I2C Controller" depends on ARCH_RENESAS || COMPILE_TEST select I2C_SLAVE + select RESET_CONTROLLER if ARCH_RCAR_GEN3 help If you say yes to this option, support will be included for the R-Car I2C controller. From 62df579a86741436efae3d1bae29d0281078a7d3 Mon Sep 17 00:00:00 2001 From: Jeffrey Lin Date: Wed, 2 Sep 2020 00:51:37 -0400 Subject: [PATCH 016/243] i2c: i801: Register lis3lv02d I2C device on Dell Latitude 5480 Value of /sys/devices/platform/lis3lv02d/position when Horizontal: (36,-108,-1152) Left elevated: (-432,-126,-1062) Front elevated: (36,594,-936) Upside down: (-126,-252,1098) Signed-off-by: Jeffrey Lin Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index e32ef3f01fe8..efab1e71ad6a 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1274,6 +1274,7 @@ static const struct { /* * Additional individual entries were added after verification. */ + { "Latitude 5480", 0x29 }, { "Vostro V131", 0x1d }, }; From 60a9f851f6bb9ea180b09e14cd461a14448f0b56 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 12 Aug 2020 11:45:54 +0800 Subject: [PATCH 017/243] i2c: imx: Use dev_err_probe() to simplify error handling dev_err_probe() can reduce code size, uniform error handling and record the defer probe reason etc., use it to simplify the code. Signed-off-by: Anson Huang Acked-by: Oleksij Rempel Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 0ab5381aa012..63f4367c312b 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1159,11 +1159,9 @@ static int i2c_imx_probe(struct platform_device *pdev) /* Get I2C clock */ i2c_imx->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(i2c_imx->clk)) { - if (PTR_ERR(i2c_imx->clk) != -EPROBE_DEFER) - dev_err(&pdev->dev, "can't get I2C clock\n"); - return PTR_ERR(i2c_imx->clk); - } + if (IS_ERR(i2c_imx->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(i2c_imx->clk), + "can't get I2C clock\n"); ret = clk_prepare_enable(i2c_imx->clk); if (ret) { From e6277308ac214af04516564c516a0ff5f753d009 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 1 Jul 2020 23:48:30 +0200 Subject: [PATCH 018/243] i2c: add binding to mark a bus as SMBus SMBus is largely compatible with I2C but there are some specifics. In case we need them on a bus, we can now use this new binding. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt index a21c359b9f02..df41f72afc87 100644 --- a/Documentation/devicetree/bindings/i2c/i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c.txt @@ -87,6 +87,11 @@ wants to support one of the below features, it should adapt these bindings. this information to detect a stalled bus more reliably, for example. Can not be combined with 'multi-master'. +- smbus + states that additional SMBus restrictions and features apply to this bus. + Examples of features are SMBusHostNotify and SMBusAlert. Examples of + restrictions are more reserved addresses and timeout definitions. + Required properties (per child device) -------------------------------------- From 2a71593da34d473461f2f5c3dbb53b883596188a Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 3 Aug 2020 07:17:55 +0200 Subject: [PATCH 019/243] i2c: smbus: add core function handling SMBus host-notify SMBus Host-Notify protocol, from the adapter point of view consist of receiving a message from a client, including the client address and some other data. It can be simply handled by creating a new slave device and registering a callback performing the parsing of the message received from the client. This commit introduces two new core functions * i2c_new_slave_host_notify_device * i2c_free_slave_host_notify_device that take care of registration of the new slave device and callback and will call i2c_handle_smbus_host_notify once a Host-Notify event is received. Signed-off-by: Alain Volmat Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-smbus.c | 107 ++++++++++++++++++++++++++++++++++++++ include/linux/i2c-smbus.h | 12 +++++ 2 files changed, 119 insertions(+) diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c index dc0108287ccf..d3d06e3b4f3b 100644 --- a/drivers/i2c/i2c-smbus.c +++ b/drivers/i2c/i2c-smbus.c @@ -197,6 +197,113 @@ EXPORT_SYMBOL_GPL(i2c_handle_smbus_alert); module_i2c_driver(smbalert_driver); +#if IS_ENABLED(CONFIG_I2C_SLAVE) +#define SMBUS_HOST_NOTIFY_LEN 3 +struct i2c_slave_host_notify_status { + u8 index; + u8 addr; +}; + +static int i2c_slave_host_notify_cb(struct i2c_client *client, + enum i2c_slave_event event, u8 *val) +{ + struct i2c_slave_host_notify_status *status = client->dev.platform_data; + + switch (event) { + case I2C_SLAVE_WRITE_RECEIVED: + /* We only retrieve the first byte received (addr) + * since there is currently no support to retrieve the data + * parameter from the client. + */ + if (status->index == 0) + status->addr = *val; + if (status->index < U8_MAX) + status->index++; + break; + case I2C_SLAVE_STOP: + if (status->index == SMBUS_HOST_NOTIFY_LEN) + i2c_handle_smbus_host_notify(client->adapter, + status->addr); + fallthrough; + case I2C_SLAVE_WRITE_REQUESTED: + status->index = 0; + break; + case I2C_SLAVE_READ_REQUESTED: + case I2C_SLAVE_READ_PROCESSED: + *val = 0xff; + break; + } + + return 0; +} + +/** + * i2c_new_slave_host_notify_device - get a client for SMBus host-notify support + * @adapter: the target adapter + * Context: can sleep + * + * Setup handling of the SMBus host-notify protocol on a given I2C bus segment. + * + * Handling is done by creating a device and its callback and handling data + * received via the SMBus host-notify address (0x8) + * + * This returns the client, which should be ultimately freed using + * i2c_free_slave_host_notify_device(); or an ERRPTR to indicate an error. + */ +struct i2c_client *i2c_new_slave_host_notify_device(struct i2c_adapter *adapter) +{ + struct i2c_board_info host_notify_board_info = { + I2C_BOARD_INFO("smbus_host_notify", 0x08), + .flags = I2C_CLIENT_SLAVE, + }; + struct i2c_slave_host_notify_status *status; + struct i2c_client *client; + int ret; + + status = kzalloc(sizeof(struct i2c_slave_host_notify_status), + GFP_KERNEL); + if (!status) + return ERR_PTR(-ENOMEM); + + host_notify_board_info.platform_data = status; + + client = i2c_new_client_device(adapter, &host_notify_board_info); + if (IS_ERR(client)) { + kfree(status); + return client; + } + + ret = i2c_slave_register(client, i2c_slave_host_notify_cb); + if (ret) { + i2c_unregister_device(client); + kfree(status); + return ERR_PTR(ret); + } + + return client; +} +EXPORT_SYMBOL_GPL(i2c_new_slave_host_notify_device); + +/** + * i2c_free_slave_host_notify_device - free the client for SMBus host-notify + * support + * @client: the client to free + * Context: can sleep + * + * Free the i2c_client allocated via i2c_new_slave_host_notify_device + */ +void i2c_free_slave_host_notify_device(struct i2c_client *client) +{ + if (IS_ERR_OR_NULL(client)) + return; + + i2c_slave_unregister(client); + kfree(client->dev.platform_data); + i2c_unregister_device(client); +} +EXPORT_SYMBOL_GPL(i2c_free_slave_host_notify_device); +#endif + /* * SPD is not part of SMBus but we include it here for convenience as the * target systems are the same. diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index 1e4e0de4ef8b..1ef421818d3a 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -38,6 +38,18 @@ static inline int of_i2c_setup_smbus_alert(struct i2c_adapter *adap) return 0; } #endif +#if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_I2C_SLAVE) +struct i2c_client *i2c_new_slave_host_notify_device(struct i2c_adapter *adapter); +void i2c_free_slave_host_notify_device(struct i2c_client *client); +#else +static inline struct i2c_client *i2c_new_slave_host_notify_device(struct i2c_adapter *adapter) +{ + return ERR_PTR(-ENOSYS); +} +static inline void i2c_free_slave_host_notify_device(struct i2c_client *client) +{ +} +#endif #if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_DMI) void i2c_register_spd(struct i2c_adapter *adap); From c52d270c68a02f94c5c081b7fc57119058e4670a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 30 Aug 2020 10:09:37 +0200 Subject: [PATCH 020/243] rtc: s3c: Simplify with dev_err_probe() Common pattern of handling deferred probe can be simplified with dev_err_probe(). Less code and the error value gets printed. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200830080937.14367-1-krzk@kernel.org --- drivers/rtc/rtc-s3c.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index e1b50e682fc4..24a41909f049 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -494,13 +494,8 @@ static int s3c_rtc_probe(struct platform_device *pdev) if (info->data->needs_src_clk) { info->rtc_src_clk = devm_clk_get(&pdev->dev, "rtc_src"); if (IS_ERR(info->rtc_src_clk)) { - ret = PTR_ERR(info->rtc_src_clk); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "failed to find rtc source clock\n"); - else - dev_dbg(&pdev->dev, - "probe deferred due to missing rtc src clk\n"); + ret = dev_err_probe(&pdev->dev, PTR_ERR(info->rtc_src_clk), + "failed to find rtc source clock\n"); goto err_src_clk; } ret = clk_prepare_enable(info->rtc_src_clk); From d3b14296da69adb7825022f3224ac6137eb30abf Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:48 +0200 Subject: [PATCH 021/243] rtc: rx8010: don't modify the global rtc ops The way the driver is implemented is buggy for the (admittedly unlikely) use case where there are two RTCs with one having an interrupt configured and the second not. This is caused by the fact that we use a global rtc_class_ops struct which we modify depending on whether the irq number is present or not. Fix it by using two const ops structs with and without alarm operations. While at it: not being able to request a configured interrupt is an error so don't ignore it and bail out of probe(). Fixes: ed13d89b08e3 ("rtc: Add Epson RX8010SJ RTC driver") Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200914154601.32245-2-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index fe010151ec8f..08c93d492494 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -407,16 +407,26 @@ static int rx8010_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) } } -static struct rtc_class_ops rx8010_rtc_ops = { +static const struct rtc_class_ops rx8010_rtc_ops_default = { .read_time = rx8010_get_time, .set_time = rx8010_set_time, .ioctl = rx8010_ioctl, }; +static const struct rtc_class_ops rx8010_rtc_ops_alarm = { + .read_time = rx8010_get_time, + .set_time = rx8010_set_time, + .ioctl = rx8010_ioctl, + .read_alarm = rx8010_read_alarm, + .set_alarm = rx8010_set_alarm, + .alarm_irq_enable = rx8010_alarm_irq_enable, +}; + static int rx8010_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct i2c_adapter *adapter = client->adapter; + const struct rtc_class_ops *rtc_ops; struct rx8010_data *rx8010; int err = 0; @@ -447,16 +457,16 @@ static int rx8010_probe(struct i2c_client *client, if (err) { dev_err(&client->dev, "unable to request IRQ\n"); - client->irq = 0; - } else { - rx8010_rtc_ops.read_alarm = rx8010_read_alarm; - rx8010_rtc_ops.set_alarm = rx8010_set_alarm; - rx8010_rtc_ops.alarm_irq_enable = rx8010_alarm_irq_enable; + return err; } + + rtc_ops = &rx8010_rtc_ops_alarm; + } else { + rtc_ops = &rx8010_rtc_ops_default; } rx8010->rtc = devm_rtc_device_register(&client->dev, client->name, - &rx8010_rtc_ops, THIS_MODULE); + rtc_ops, THIS_MODULE); if (IS_ERR(rx8010->rtc)) { dev_err(&client->dev, "unable to register the class device\n"); From 2e0ce569102ccb1ca9bacc499c8411fb8fa53069 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:49 +0200 Subject: [PATCH 022/243] rtc: rx8010: remove a stray newline Remove an unnecessary newline after requesting the interrupt. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-3-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 08c93d492494..c6797ec0aba1 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -454,7 +454,6 @@ static int rx8010_probe(struct i2c_client *client, rx8010_irq_1_handler, IRQF_TRIGGER_LOW | IRQF_ONESHOT, "rx8010", client); - if (err) { dev_err(&client->dev, "unable to request IRQ\n"); return err; From 28c86f30c979f9d4460dd7680610c3470b4d009b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:50 +0200 Subject: [PATCH 023/243] rtc: rx8010: remove unnecessary brackets Remove brackets wherever they guard a single line. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-4-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index c6797ec0aba1..79a3d90d2c43 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -181,9 +181,8 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt) return ret; flagreg = i2c_smbus_read_byte_data(rx8010->client, RX8010_FLAG); - if (flagreg < 0) { + if (flagreg < 0) return flagreg; - } if (flagreg & RX8010_FLAG_VLF) ret = i2c_smbus_write_byte_data(rx8010->client, RX8010_FLAG, @@ -284,17 +283,15 @@ static int rx8010_set_alarm(struct device *dev, struct rtc_wkalrm *t) int err; flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG); - if (flagreg < 0) { + if (flagreg < 0) return flagreg; - } if (rx8010->ctrlreg & (RX8010_CTRL_AIE | RX8010_CTRL_UIE)) { rx8010->ctrlreg &= ~(RX8010_CTRL_AIE | RX8010_CTRL_UIE); err = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, rx8010->ctrlreg); - if (err < 0) { + if (err < 0) return err; - } } flagreg &= ~RX8010_FLAG_AF; From 75677971991940581e76bcd5176ea40d0baf8fcd Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:51 +0200 Subject: [PATCH 024/243] rtc: rx8010: consolidate local variables of the same type Move local variables of the same type into a single line for better readability. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-5-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 79a3d90d2c43..153fa58f0365 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -109,8 +109,7 @@ static int rx8010_get_time(struct device *dev, struct rtc_time *dt) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); u8 date[7]; - int flagreg; - int err; + int flagreg, err; flagreg = i2c_smbus_read_byte_data(rx8010->client, RX8010_FLAG); if (flagreg < 0) @@ -141,8 +140,7 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); u8 date[7]; - int ctrl, flagreg; - int ret; + int ctrl, flagreg, ret; if ((dt->tm_year < 100) || (dt->tm_year > 199)) return -EINVAL; @@ -250,8 +248,7 @@ static int rx8010_read_alarm(struct device *dev, struct rtc_wkalrm *t) struct rx8010_data *rx8010 = dev_get_drvdata(dev); struct i2c_client *client = rx8010->client; u8 alarmvals[3]; - int flagreg; - int err; + int flagreg, err; err = i2c_smbus_read_i2c_block_data(client, RX8010_ALMIN, 3, alarmvals); if (err != 3) @@ -279,8 +276,7 @@ static int rx8010_set_alarm(struct device *dev, struct rtc_wkalrm *t) struct i2c_client *client = to_i2c_client(dev); struct rx8010_data *rx8010 = dev_get_drvdata(dev); u8 alarmvals[3]; - int extreg, flagreg; - int err; + int extreg, flagreg, err; flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG); if (flagreg < 0) @@ -346,9 +342,8 @@ static int rx8010_alarm_irq_enable(struct device *dev, { struct i2c_client *client = to_i2c_client(dev); struct rx8010_data *rx8010 = dev_get_drvdata(dev); - int flagreg; + int flagreg, err; u8 ctrl; - int err; ctrl = rx8010->ctrlreg; @@ -387,8 +382,7 @@ static int rx8010_alarm_irq_enable(struct device *dev, static int rx8010_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); - int tmp; - int flagreg; + int tmp, flagreg; switch (cmd) { case RTC_VL_READ: From e9e4c2dae4313b88c62ee9df9d177a71c23121b2 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:52 +0200 Subject: [PATCH 025/243] rtc: rx8010: use tabs instead of spaces for code formatting The define values in this driver are close to their names and they are separated by spaces. Use tabs instead and align all defines. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-6-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 58 ++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 153fa58f0365..51ac4fac8f19 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -13,40 +13,40 @@ #include #include -#define RX8010_SEC 0x10 -#define RX8010_MIN 0x11 -#define RX8010_HOUR 0x12 -#define RX8010_WDAY 0x13 -#define RX8010_MDAY 0x14 -#define RX8010_MONTH 0x15 -#define RX8010_YEAR 0x16 -#define RX8010_RESV17 0x17 -#define RX8010_ALMIN 0x18 -#define RX8010_ALHOUR 0x19 -#define RX8010_ALWDAY 0x1A -#define RX8010_TCOUNT0 0x1B -#define RX8010_TCOUNT1 0x1C -#define RX8010_EXT 0x1D -#define RX8010_FLAG 0x1E -#define RX8010_CTRL 0x1F +#define RX8010_SEC 0x10 +#define RX8010_MIN 0x11 +#define RX8010_HOUR 0x12 +#define RX8010_WDAY 0x13 +#define RX8010_MDAY 0x14 +#define RX8010_MONTH 0x15 +#define RX8010_YEAR 0x16 +#define RX8010_RESV17 0x17 +#define RX8010_ALMIN 0x18 +#define RX8010_ALHOUR 0x19 +#define RX8010_ALWDAY 0x1A +#define RX8010_TCOUNT0 0x1B +#define RX8010_TCOUNT1 0x1C +#define RX8010_EXT 0x1D +#define RX8010_FLAG 0x1E +#define RX8010_CTRL 0x1F /* 0x20 to 0x2F are user registers */ -#define RX8010_RESV30 0x30 -#define RX8010_RESV31 0x31 -#define RX8010_IRQ 0x32 +#define RX8010_RESV30 0x30 +#define RX8010_RESV31 0x31 +#define RX8010_IRQ 0x32 -#define RX8010_EXT_WADA BIT(3) +#define RX8010_EXT_WADA BIT(3) -#define RX8010_FLAG_VLF BIT(1) -#define RX8010_FLAG_AF BIT(3) -#define RX8010_FLAG_TF BIT(4) -#define RX8010_FLAG_UF BIT(5) +#define RX8010_FLAG_VLF BIT(1) +#define RX8010_FLAG_AF BIT(3) +#define RX8010_FLAG_TF BIT(4) +#define RX8010_FLAG_UF BIT(5) -#define RX8010_CTRL_AIE BIT(3) -#define RX8010_CTRL_UIE BIT(5) -#define RX8010_CTRL_STOP BIT(6) -#define RX8010_CTRL_TEST BIT(7) +#define RX8010_CTRL_AIE BIT(3) +#define RX8010_CTRL_UIE BIT(5) +#define RX8010_CTRL_STOP BIT(6) +#define RX8010_CTRL_TEST BIT(7) -#define RX8010_ALARM_AE BIT(7) +#define RX8010_ALARM_AE BIT(7) static const struct i2c_device_id rx8010_id[] = { { "rx8010", 0 }, From 13952c9e35384fd7f63a5ce8261108695491bb56 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:53 +0200 Subject: [PATCH 026/243] rtc: rx8010: rename ret to err in rx8010_set_time() All other functions in this driver use 'err' for integer return values. Do the same in rx8010_set_time() for consistency. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-7-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 51ac4fac8f19..300314ab7b6d 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -140,7 +140,7 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); u8 date[7]; - int ctrl, flagreg, ret; + int ctrl, flagreg, err; if ((dt->tm_year < 100) || (dt->tm_year > 199)) return -EINVAL; @@ -150,10 +150,10 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt) if (ctrl < 0) return ctrl; rx8010->ctrlreg = ctrl | RX8010_CTRL_STOP; - ret = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, + err = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, rx8010->ctrlreg); - if (ret < 0) - return ret; + if (err < 0) + return err; date[RX8010_SEC - RX8010_SEC] = bin2bcd(dt->tm_sec); date[RX8010_MIN - RX8010_SEC] = bin2bcd(dt->tm_min); @@ -163,27 +163,27 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt) date[RX8010_YEAR - RX8010_SEC] = bin2bcd(dt->tm_year - 100); date[RX8010_WDAY - RX8010_SEC] = bin2bcd(1 << dt->tm_wday); - ret = i2c_smbus_write_i2c_block_data(rx8010->client, + err = i2c_smbus_write_i2c_block_data(rx8010->client, RX8010_SEC, 7, date); - if (ret < 0) - return ret; + if (err < 0) + return err; /* clear STOP bit after changing clock/calendar */ ctrl = i2c_smbus_read_byte_data(rx8010->client, RX8010_CTRL); if (ctrl < 0) return ctrl; rx8010->ctrlreg = ctrl & ~RX8010_CTRL_STOP; - ret = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, + err = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, rx8010->ctrlreg); - if (ret < 0) - return ret; + if (err < 0) + return err; flagreg = i2c_smbus_read_byte_data(rx8010->client, RX8010_FLAG); if (flagreg < 0) return flagreg; if (flagreg & RX8010_FLAG_VLF) - ret = i2c_smbus_write_byte_data(rx8010->client, RX8010_FLAG, + err = i2c_smbus_write_byte_data(rx8010->client, RX8010_FLAG, flagreg & ~RX8010_FLAG_VLF); return 0; From f702699c67d315e4a232c64801b2de9af87fd9f4 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:54 +0200 Subject: [PATCH 027/243] rtc: rx8010: don't use magic values for time buffer length The time buffer len is used directly in this driver. For readability it's better to define it as the difference between the date register offsets and use sizeof() whenever referencing it. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-8-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 300314ab7b6d..2c894e7aab6d 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -108,7 +108,7 @@ static irqreturn_t rx8010_irq_1_handler(int irq, void *dev_id) static int rx8010_get_time(struct device *dev, struct rtc_time *dt) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); - u8 date[7]; + u8 date[RX8010_YEAR - RX8010_SEC + 1]; int flagreg, err; flagreg = i2c_smbus_read_byte_data(rx8010->client, RX8010_FLAG); @@ -121,8 +121,8 @@ static int rx8010_get_time(struct device *dev, struct rtc_time *dt) } err = i2c_smbus_read_i2c_block_data(rx8010->client, RX8010_SEC, - 7, date); - if (err != 7) + sizeof(date), date); + if (err != sizeof(date)) return err < 0 ? err : -EIO; dt->tm_sec = bcd2bin(date[RX8010_SEC - RX8010_SEC] & 0x7f); @@ -139,7 +139,7 @@ static int rx8010_get_time(struct device *dev, struct rtc_time *dt) static int rx8010_set_time(struct device *dev, struct rtc_time *dt) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); - u8 date[7]; + u8 date[RX8010_YEAR - RX8010_SEC + 1]; int ctrl, flagreg, err; if ((dt->tm_year < 100) || (dt->tm_year > 199)) @@ -164,7 +164,8 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt) date[RX8010_WDAY - RX8010_SEC] = bin2bcd(1 << dt->tm_wday); err = i2c_smbus_write_i2c_block_data(rx8010->client, - RX8010_SEC, 7, date); + RX8010_SEC, sizeof(date), + date); if (err < 0) return err; From b3ff7fd68d925de2159a5312f28dcd178d0d3715 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:55 +0200 Subject: [PATCH 028/243] rtc: rx8010: drop unnecessary initialization The 'err' local variable in rx8010_init_client() doesn't need to be initialized. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-9-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 2c894e7aab6d..64a9964eb5e0 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -194,7 +194,7 @@ static int rx8010_init_client(struct i2c_client *client) { struct rx8010_data *rx8010 = i2c_get_clientdata(client); u8 ctrl[2]; - int need_clear = 0, err = 0; + int need_clear = 0, err; /* Initialize reserved registers as specified in datasheet */ err = i2c_smbus_write_byte_data(client, RX8010_RESV17, 0xD8); From 955a123c14906e3adc43d43281f8fde91f631f7f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:56 +0200 Subject: [PATCH 029/243] rtc: rx8010: use a helper variable for client->dev in probe() Simple 'dev' looks better then repeated &client->dev and has the added benefit of avoiding unnecessary line breaks. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-10-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 64a9964eb5e0..dba7c3f87d9e 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -419,6 +419,7 @@ static int rx8010_probe(struct i2c_client *client, { struct i2c_adapter *adapter = client->adapter; const struct rtc_class_ops *rtc_ops; + struct device *dev = &client->dev; struct rx8010_data *rx8010; int err = 0; @@ -428,8 +429,7 @@ static int rx8010_probe(struct i2c_client *client, return -EIO; } - rx8010 = devm_kzalloc(&client->dev, sizeof(struct rx8010_data), - GFP_KERNEL); + rx8010 = devm_kzalloc(dev, sizeof(struct rx8010_data), GFP_KERNEL); if (!rx8010) return -ENOMEM; @@ -441,13 +441,13 @@ static int rx8010_probe(struct i2c_client *client, return err; if (client->irq > 0) { - dev_info(&client->dev, "IRQ %d supplied\n", client->irq); - err = devm_request_threaded_irq(&client->dev, client->irq, NULL, + dev_info(dev, "IRQ %d supplied\n", client->irq); + err = devm_request_threaded_irq(dev, client->irq, NULL, rx8010_irq_1_handler, IRQF_TRIGGER_LOW | IRQF_ONESHOT, "rx8010", client); if (err) { - dev_err(&client->dev, "unable to request IRQ\n"); + dev_err(dev, "unable to request IRQ\n"); return err; } @@ -456,11 +456,10 @@ static int rx8010_probe(struct i2c_client *client, rtc_ops = &rx8010_rtc_ops_default; } - rx8010->rtc = devm_rtc_device_register(&client->dev, client->name, + rx8010->rtc = devm_rtc_device_register(dev, client->name, rtc_ops, THIS_MODULE); - if (IS_ERR(rx8010->rtc)) { - dev_err(&client->dev, "unable to register the class device\n"); + dev_err(dev, "unable to register the class device\n"); return PTR_ERR(rx8010->rtc); } From 666f21413b881e159efaf862f119d4d058fa2c4a Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:57 +0200 Subject: [PATCH 030/243] rtc: rx8010: prefer sizeof(*val) over sizeof(struct type_of_val) Using the size of the variable is preferred over using the size of its type when allocating memory. Convert the call to devm_kzalloc() in probe(). Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-11-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index dba7c3f87d9e..aa357f800ad4 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -429,7 +429,7 @@ static int rx8010_probe(struct i2c_client *client, return -EIO; } - rx8010 = devm_kzalloc(dev, sizeof(struct rx8010_data), GFP_KERNEL); + rx8010 = devm_kzalloc(dev, sizeof(*rx8010), GFP_KERNEL); if (!rx8010) return -ENOMEM; From 0ce627785afa730d8f6568eb8738d1700cbc4569 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:58 +0200 Subject: [PATCH 031/243] rtc: rx8010: switch to using the preferred RTC API Use devm_rtc_allocate_device() + rtc_register_device() instead of the deprecated devm_rtc_device_register(). Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-12-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index aa357f800ad4..6aeed3802670 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -418,7 +418,6 @@ static int rx8010_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct i2c_adapter *adapter = client->adapter; - const struct rtc_class_ops *rtc_ops; struct device *dev = &client->dev; struct rx8010_data *rx8010; int err = 0; @@ -440,6 +439,10 @@ static int rx8010_probe(struct i2c_client *client, if (err) return err; + rx8010->rtc = devm_rtc_allocate_device(dev); + if (IS_ERR(rx8010->rtc)) + return PTR_ERR(rx8010->rtc); + if (client->irq > 0) { dev_info(dev, "IRQ %d supplied\n", client->irq); err = devm_request_threaded_irq(dev, client->irq, NULL, @@ -451,21 +454,14 @@ static int rx8010_probe(struct i2c_client *client, return err; } - rtc_ops = &rx8010_rtc_ops_alarm; + rx8010->rtc->ops = &rx8010_rtc_ops_alarm; } else { - rtc_ops = &rx8010_rtc_ops_default; - } - - rx8010->rtc = devm_rtc_device_register(dev, client->name, - rtc_ops, THIS_MODULE); - if (IS_ERR(rx8010->rtc)) { - dev_err(dev, "unable to register the class device\n"); - return PTR_ERR(rx8010->rtc); + rx8010->rtc->ops = &rx8010_rtc_ops_default; } rx8010->rtc->max_user_freq = 1; - return 0; + return rtc_register_device(rx8010->rtc); } static struct i2c_driver rx8010_driver = { From cee015d90d96495d8376871af0f1a33027303d5e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:59 +0200 Subject: [PATCH 032/243] rtc: rx8010: switch to using the preferred i2c API We should generally use probe_new instead of probe when registering i2c drivers. Convert rx8010 to using it. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-13-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 6aeed3802670..3c82f7d48a65 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -414,8 +414,7 @@ static const struct rtc_class_ops rx8010_rtc_ops_alarm = { .alarm_irq_enable = rx8010_alarm_irq_enable, }; -static int rx8010_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int rx8010_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; struct device *dev = &client->dev; @@ -469,7 +468,7 @@ static struct i2c_driver rx8010_driver = { .name = "rtc-rx8010", .of_match_table = of_match_ptr(rx8010_of_match), }, - .probe = rx8010_probe, + .probe_new = rx8010_probe, .id_table = rx8010_id, }; From 9868bc1ce272dc0387488e779c585e7a12cf7a1b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:46:00 +0200 Subject: [PATCH 033/243] rtc: rx8010: convert to using regmap This driver requires SMBUS to work. We can relax this requirement if we switch to using i2c regmap and let the regmap sub-system figure out how to talk to the bus. This also has the advantage of shrinking the code for register updates. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-14-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 198 ++++++++++++++++----------------------- 1 file changed, 79 insertions(+), 119 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 3c82f7d48a65..b8aa98fb62de 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #define RX8010_SEC 0x10 @@ -61,7 +62,7 @@ static const struct of_device_id rx8010_of_match[] = { MODULE_DEVICE_TABLE(of, rx8010_of_match); struct rx8010_data { - struct i2c_client *client; + struct regmap *regs; struct rtc_device *rtc; u8 ctrlreg; }; @@ -70,13 +71,12 @@ static irqreturn_t rx8010_irq_1_handler(int irq, void *dev_id) { struct i2c_client *client = dev_id; struct rx8010_data *rx8010 = i2c_get_clientdata(client); - int flagreg; + int flagreg, err; mutex_lock(&rx8010->rtc->ops_lock); - flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG); - - if (flagreg <= 0) { + err = regmap_read(rx8010->regs, RX8010_FLAG, &flagreg); + if (err) { mutex_unlock(&rx8010->rtc->ops_lock); return IRQ_NONE; } @@ -99,10 +99,9 @@ static irqreturn_t rx8010_irq_1_handler(int irq, void *dev_id) rtc_update_irq(rx8010->rtc, 1, RTC_UF | RTC_IRQF); } - i2c_smbus_write_byte_data(client, RX8010_FLAG, flagreg); - + err = regmap_write(rx8010->regs, RX8010_FLAG, flagreg); mutex_unlock(&rx8010->rtc->ops_lock); - return IRQ_HANDLED; + return err ? IRQ_NONE : IRQ_HANDLED; } static int rx8010_get_time(struct device *dev, struct rtc_time *dt) @@ -111,19 +110,18 @@ static int rx8010_get_time(struct device *dev, struct rtc_time *dt) u8 date[RX8010_YEAR - RX8010_SEC + 1]; int flagreg, err; - flagreg = i2c_smbus_read_byte_data(rx8010->client, RX8010_FLAG); - if (flagreg < 0) - return flagreg; + err = regmap_read(rx8010->regs, RX8010_FLAG, &flagreg); + if (err) + return err; if (flagreg & RX8010_FLAG_VLF) { dev_warn(dev, "Frequency stop detected\n"); return -EINVAL; } - err = i2c_smbus_read_i2c_block_data(rx8010->client, RX8010_SEC, - sizeof(date), date); - if (err != sizeof(date)) - return err < 0 ? err : -EIO; + err = regmap_bulk_read(rx8010->regs, RX8010_SEC, date, sizeof(date)); + if (err) + return err; dt->tm_sec = bcd2bin(date[RX8010_SEC - RX8010_SEC] & 0x7f); dt->tm_min = bcd2bin(date[RX8010_MIN - RX8010_SEC] & 0x7f); @@ -140,19 +138,14 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); u8 date[RX8010_YEAR - RX8010_SEC + 1]; - int ctrl, flagreg, err; + int err; if ((dt->tm_year < 100) || (dt->tm_year > 199)) return -EINVAL; /* set STOP bit before changing clock/calendar */ - ctrl = i2c_smbus_read_byte_data(rx8010->client, RX8010_CTRL); - if (ctrl < 0) - return ctrl; - rx8010->ctrlreg = ctrl | RX8010_CTRL_STOP; - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, - rx8010->ctrlreg); - if (err < 0) + err = regmap_set_bits(rx8010->regs, RX8010_CTRL, RX8010_CTRL_STOP); + if (err) return err; date[RX8010_SEC - RX8010_SEC] = bin2bcd(dt->tm_sec); @@ -163,66 +156,54 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt) date[RX8010_YEAR - RX8010_SEC] = bin2bcd(dt->tm_year - 100); date[RX8010_WDAY - RX8010_SEC] = bin2bcd(1 << dt->tm_wday); - err = i2c_smbus_write_i2c_block_data(rx8010->client, - RX8010_SEC, sizeof(date), - date); - if (err < 0) + err = regmap_bulk_write(rx8010->regs, RX8010_SEC, date, sizeof(date)); + if (err) return err; /* clear STOP bit after changing clock/calendar */ - ctrl = i2c_smbus_read_byte_data(rx8010->client, RX8010_CTRL); - if (ctrl < 0) - return ctrl; - rx8010->ctrlreg = ctrl & ~RX8010_CTRL_STOP; - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, - rx8010->ctrlreg); - if (err < 0) + err = regmap_clear_bits(rx8010->regs, RX8010_CTRL, RX8010_CTRL_STOP); + if (err) return err; - flagreg = i2c_smbus_read_byte_data(rx8010->client, RX8010_FLAG); - if (flagreg < 0) - return flagreg; - - if (flagreg & RX8010_FLAG_VLF) - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_FLAG, - flagreg & ~RX8010_FLAG_VLF); + err = regmap_clear_bits(rx8010->regs, RX8010_FLAG, RX8010_FLAG_VLF); + if (err) + return err; return 0; } -static int rx8010_init_client(struct i2c_client *client) +static int rx8010_init_client(struct device *dev) { - struct rx8010_data *rx8010 = i2c_get_clientdata(client); + struct rx8010_data *rx8010 = dev_get_drvdata(dev); u8 ctrl[2]; int need_clear = 0, err; /* Initialize reserved registers as specified in datasheet */ - err = i2c_smbus_write_byte_data(client, RX8010_RESV17, 0xD8); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_RESV17, 0xD8); + if (err) return err; - err = i2c_smbus_write_byte_data(client, RX8010_RESV30, 0x00); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_RESV30, 0x00); + if (err) return err; - err = i2c_smbus_write_byte_data(client, RX8010_RESV31, 0x08); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_RESV31, 0x08); + if (err) return err; - err = i2c_smbus_write_byte_data(client, RX8010_IRQ, 0x00); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_IRQ, 0x00); + if (err) return err; - err = i2c_smbus_read_i2c_block_data(rx8010->client, RX8010_FLAG, - 2, ctrl); - if (err != 2) - return err < 0 ? err : -EIO; + err = regmap_bulk_read(rx8010->regs, RX8010_FLAG, ctrl, 2); + if (err) + return err; if (ctrl[0] & RX8010_FLAG_VLF) - dev_warn(&client->dev, "Frequency stop was detected\n"); + dev_warn(dev, "Frequency stop was detected\n"); if (ctrl[0] & RX8010_FLAG_AF) { - dev_warn(&client->dev, "Alarm was detected\n"); + dev_warn(dev, "Alarm was detected\n"); need_clear = 1; } @@ -234,8 +215,8 @@ static int rx8010_init_client(struct i2c_client *client) if (need_clear) { ctrl[0] &= ~(RX8010_FLAG_AF | RX8010_FLAG_TF | RX8010_FLAG_UF); - err = i2c_smbus_write_byte_data(client, RX8010_FLAG, ctrl[0]); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_FLAG, ctrl[0]); + if (err) return err; } @@ -247,17 +228,16 @@ static int rx8010_init_client(struct i2c_client *client) static int rx8010_read_alarm(struct device *dev, struct rtc_wkalrm *t) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); - struct i2c_client *client = rx8010->client; u8 alarmvals[3]; int flagreg, err; - err = i2c_smbus_read_i2c_block_data(client, RX8010_ALMIN, 3, alarmvals); - if (err != 3) - return err < 0 ? err : -EIO; + err = regmap_bulk_read(rx8010->regs, RX8010_ALMIN, alarmvals, 3); + if (err) + return err; - flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG); - if (flagreg < 0) - return flagreg; + err = regmap_read(rx8010->regs, RX8010_FLAG, &flagreg); + if (err) + return err; t->time.tm_sec = 0; t->time.tm_min = bcd2bin(alarmvals[0] & 0x7f); @@ -274,52 +254,38 @@ static int rx8010_read_alarm(struct device *dev, struct rtc_wkalrm *t) static int rx8010_set_alarm(struct device *dev, struct rtc_wkalrm *t) { - struct i2c_client *client = to_i2c_client(dev); struct rx8010_data *rx8010 = dev_get_drvdata(dev); u8 alarmvals[3]; - int extreg, flagreg, err; - - flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG); - if (flagreg < 0) - return flagreg; + int err; if (rx8010->ctrlreg & (RX8010_CTRL_AIE | RX8010_CTRL_UIE)) { rx8010->ctrlreg &= ~(RX8010_CTRL_AIE | RX8010_CTRL_UIE); - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, - rx8010->ctrlreg); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_CTRL, rx8010->ctrlreg); + if (err) return err; } - flagreg &= ~RX8010_FLAG_AF; - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_FLAG, flagreg); - if (err < 0) + err = regmap_clear_bits(rx8010->regs, RX8010_FLAG, RX8010_FLAG_AF); + if (err) return err; alarmvals[0] = bin2bcd(t->time.tm_min); alarmvals[1] = bin2bcd(t->time.tm_hour); alarmvals[2] = bin2bcd(t->time.tm_mday); - err = i2c_smbus_write_i2c_block_data(rx8010->client, RX8010_ALMIN, - 2, alarmvals); - if (err < 0) + err = regmap_bulk_write(rx8010->regs, RX8010_ALMIN, alarmvals, 2); + if (err) return err; - extreg = i2c_smbus_read_byte_data(client, RX8010_EXT); - if (extreg < 0) - return extreg; - - extreg |= RX8010_EXT_WADA; - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_EXT, extreg); - if (err < 0) + err = regmap_clear_bits(rx8010->regs, RX8010_EXT, RX8010_EXT_WADA); + if (err) return err; if (alarmvals[2] == 0) alarmvals[2] |= RX8010_ALARM_AE; - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_ALWDAY, - alarmvals[2]); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_ALWDAY, alarmvals[2]); + if (err) return err; if (t->enabled) { @@ -329,9 +295,8 @@ static int rx8010_set_alarm(struct device *dev, struct rtc_wkalrm *t) rx8010->ctrlreg |= (RX8010_CTRL_AIE | RX8010_CTRL_UIE); - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, - rx8010->ctrlreg); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_CTRL, rx8010->ctrlreg); + if (err) return err; } @@ -341,9 +306,8 @@ static int rx8010_set_alarm(struct device *dev, struct rtc_wkalrm *t) static int rx8010_alarm_irq_enable(struct device *dev, unsigned int enabled) { - struct i2c_client *client = to_i2c_client(dev); struct rx8010_data *rx8010 = dev_get_drvdata(dev); - int flagreg, err; + int err; u8 ctrl; ctrl = rx8010->ctrlreg; @@ -360,20 +324,14 @@ static int rx8010_alarm_irq_enable(struct device *dev, ctrl &= ~RX8010_CTRL_AIE; } - flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG); - if (flagreg < 0) - return flagreg; - - flagreg &= ~RX8010_FLAG_AF; - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_FLAG, flagreg); - if (err < 0) + err = regmap_clear_bits(rx8010->regs, RX8010_FLAG, RX8010_FLAG_AF); + if (err) return err; if (ctrl != rx8010->ctrlreg) { rx8010->ctrlreg = ctrl; - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, - rx8010->ctrlreg); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_CTRL, rx8010->ctrlreg); + if (err) return err; } @@ -383,13 +341,13 @@ static int rx8010_alarm_irq_enable(struct device *dev, static int rx8010_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); - int tmp, flagreg; + int tmp, flagreg, err; switch (cmd) { case RTC_VL_READ: - flagreg = i2c_smbus_read_byte_data(rx8010->client, RX8010_FLAG); - if (flagreg < 0) - return flagreg; + err = regmap_read(rx8010->regs, RX8010_FLAG, &flagreg); + if (err) + return err; tmp = flagreg & RX8010_FLAG_VLF ? RTC_VL_DATA_INVALID : 0; return put_user(tmp, (unsigned int __user *)arg); @@ -414,27 +372,29 @@ static const struct rtc_class_ops rx8010_rtc_ops_alarm = { .alarm_irq_enable = rx8010_alarm_irq_enable, }; +static const struct regmap_config rx8010_regmap_config = { + .name = "rx8010-rtc", + .reg_bits = 8, + .val_bits = 8, +}; + static int rx8010_probe(struct i2c_client *client) { - struct i2c_adapter *adapter = client->adapter; struct device *dev = &client->dev; struct rx8010_data *rx8010; int err = 0; - if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA - | I2C_FUNC_SMBUS_I2C_BLOCK)) { - dev_err(&adapter->dev, "doesn't support required functionality\n"); - return -EIO; - } - rx8010 = devm_kzalloc(dev, sizeof(*rx8010), GFP_KERNEL); if (!rx8010) return -ENOMEM; - rx8010->client = client; i2c_set_clientdata(client, rx8010); - err = rx8010_init_client(client); + rx8010->regs = devm_regmap_init_i2c(client, &rx8010_regmap_config); + if (IS_ERR(rx8010->regs)) + return PTR_ERR(rx8010->regs); + + err = rx8010_init_client(dev); if (err) return err; From 2fc1af3095af5cbcd8fc406610dc196b62e3ed21 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:46:01 +0200 Subject: [PATCH 034/243] rtc: rx8010: use range checking provided by core RTC code We don't need to check the time range manually in set_time(), we can use range_min and range_max exposed by struct rtc_device. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200914154601.32245-15-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index b8aa98fb62de..01e9017d4025 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -140,9 +140,6 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt) u8 date[RX8010_YEAR - RX8010_SEC + 1]; int err; - if ((dt->tm_year < 100) || (dt->tm_year > 199)) - return -EINVAL; - /* set STOP bit before changing clock/calendar */ err = regmap_set_bits(rx8010->regs, RX8010_CTRL, RX8010_CTRL_STOP); if (err) @@ -419,6 +416,8 @@ static int rx8010_probe(struct i2c_client *client) } rx8010->rtc->max_user_freq = 1; + rx8010->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rx8010->rtc->range_max = RTC_TIMESTAMP_END_2099; return rtc_register_device(rx8010->rtc); } From d0a3b65052f041852c855ea1135659770ba0bc09 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 13 Sep 2020 14:26:44 +0200 Subject: [PATCH 035/243] rtc: st-lpc: Constify st_rtc_ops The only usage of st_rtc_ops is to assign its address to the ops field in the rtc_device struct. which is a const pointer. Make it const to allow the compiler to put it in read-only memory. Signed-off-by: Rikard Falkeborn Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200913122644.35515-1-rikard.falkeborn@gmail.com --- drivers/rtc/rtc-st-lpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c index 51041dc08af4..0c65448b85ee 100644 --- a/drivers/rtc/rtc-st-lpc.c +++ b/drivers/rtc/rtc-st-lpc.c @@ -173,7 +173,7 @@ static int st_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *t) return 0; } -static struct rtc_class_ops st_rtc_ops = { +static const struct rtc_class_ops st_rtc_ops = { .read_time = st_rtc_read_time, .set_time = st_rtc_set_time, .read_alarm = st_rtc_read_alarm, From 9f8010e71f091b0609452742cfed4650ee633c44 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 10 Sep 2020 10:41:24 +0200 Subject: [PATCH 036/243] rtc: ds1685: Fix bank switching to avoid endless loop ds1685_rtc_begin_data_access() tried to access an extended register before enabling access to it by switching to bank 1. Depending on content in NVRAM this could lead to an endless loop. While at it fix also switch back to bank 0 in ds1685_rtc_end_data_access(). Signed-off-by: Thomas Bogendoerfer Signed-off-by: Alexandre Belloni Acked-by: Joshua Kinard Link: https://lore.kernel.org/r/20200910084124.138560-1-tsbogend@alpha.franken.de --- drivers/rtc/rtc-ds1685.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index 56c670af2e50..dfbd7b88b2b9 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -193,12 +193,12 @@ ds1685_rtc_begin_data_access(struct ds1685_priv *rtc) rtc->write(rtc, RTC_CTRL_B, (rtc->read(rtc, RTC_CTRL_B) | RTC_CTRL_B_SET)); + /* Switch to Bank 1 */ + ds1685_rtc_switch_to_bank1(rtc); + /* Read Ext Ctrl 4A and check the INCR bit to avoid a lockout. */ while (rtc->read(rtc, RTC_EXT_CTRL_4A) & RTC_CTRL_4A_INCR) cpu_relax(); - - /* Switch to Bank 1 */ - ds1685_rtc_switch_to_bank1(rtc); } /** @@ -213,7 +213,7 @@ static inline void ds1685_rtc_end_data_access(struct ds1685_priv *rtc) { /* Switch back to Bank 0 */ - ds1685_rtc_switch_to_bank1(rtc); + ds1685_rtc_switch_to_bank0(rtc); /* Clear the SET bit in Ctrl B */ rtc->write(rtc, RTC_CTRL_B, From 35425bafc772ee189e3c3790d7c672b80ba65909 Mon Sep 17 00:00:00 2001 From: Biwen Li Date: Tue, 15 Sep 2020 15:32:09 +0800 Subject: [PATCH 037/243] rtc: pcf2127: fix a bug when not specify interrupts property Fix a bug when not specify interrupts property in dts as follows, rtc-pcf2127-i2c 1-0051: failed to request alarm irq rtc-pcf2127-i2c: probe of 1-0051 failed with error -22 Signed-off-by: Biwen Li Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200915073213.12779-1-biwen.li@oss.nxp.com --- drivers/rtc/rtc-pcf2127.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index ed6316992cbb..07a5630ec841 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -559,7 +559,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, pcf2127->rtc->set_start_time = true; /* Sets actual start to 1970 */ pcf2127->rtc->uie_unsupported = 1; - if (alarm_irq >= 0) { + if (alarm_irq > 0) { ret = devm_request_threaded_irq(dev, alarm_irq, NULL, pcf2127_rtc_irq, IRQF_TRIGGER_LOW | IRQF_ONESHOT, @@ -570,7 +570,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, } } - if (alarm_irq >= 0 || device_property_read_bool(dev, "wakeup-source")) { + if (alarm_irq > 0 || device_property_read_bool(dev, "wakeup-source")) { device_init_wakeup(dev, true); pcf2127->rtc->ops = &pcf2127_rtc_alrm_ops; } From 774b9f43716d5a79272e052bcae2f3939b02a2c6 Mon Sep 17 00:00:00 2001 From: Vadym Kochan Date: Wed, 16 Sep 2020 20:09:31 +0300 Subject: [PATCH 038/243] eeprom: at24: set type id as EEPROM Set type as NVMEM_TYPE_EEPROM to expose this info via sysfs: $ cat /sys/bus/nvmem/devices/{DEVICE}/type EEPROM Signed-off-by: Vadym Kochan Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 8f5de5f10bbe..00c8ac0677b4 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -713,6 +713,7 @@ static int at24_probe(struct i2c_client *client) return err; } + nvmem_config.type = NVMEM_TYPE_EEPROM; nvmem_config.name = dev_name(dev); nvmem_config.dev = dev; nvmem_config.read_only = !writable; From 6af077194600d5445f802ce4587a3254a215a851 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 3 Aug 2020 07:17:56 +0200 Subject: [PATCH 039/243] i2c: stm32f7: Add SMBus Host-Notify protocol support Rely on the core functions to implement the host-notify protocol via the a I2C slave device. Signed-off-by: Alain Volmat Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-stm32f7.c | 112 ++++++++++++++++++++++++++----- 2 files changed, 97 insertions(+), 16 deletions(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 38639d53a171..46a24faef352 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -1025,6 +1025,7 @@ config I2C_STM32F7 tristate "STMicroelectronics STM32F7 I2C support" depends on ARCH_STM32 || COMPILE_TEST select I2C_SLAVE + select I2C_SMBUS help Enable this option to add support for STM32 I2C controller embedded in STM32F7 SoCs. diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index bff3479fe122..425eff5b18e0 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,7 @@ /* STM32F7 I2C control 1 */ #define STM32F7_I2C_CR1_PECEN BIT(23) +#define STM32F7_I2C_CR1_SMBHEN BIT(20) #define STM32F7_I2C_CR1_WUPEN BIT(18) #define STM32F7_I2C_CR1_SBC BIT(16) #define STM32F7_I2C_CR1_RXDMAEN BIT(15) @@ -150,7 +152,7 @@ #define STM32F7_I2C_MAX_LEN 0xff #define STM32F7_I2C_DMA_LEN_MIN 0x16 -#define STM32F7_I2C_MAX_SLAVE 0x2 +#define STM32F7_I2C_MAX_SLAVE 0x3 #define STM32F7_I2C_DNF_DEFAULT 0 #define STM32F7_I2C_DNF_MAX 16 @@ -301,6 +303,8 @@ struct stm32f7_i2c_msg { * @fmp_creg: register address for clearing Fast Mode Plus bits * @fmp_mask: mask for Fast Mode Plus bits in set register * @wakeup_src: boolean to know if the device is a wakeup source + * @smbus_mode: states that the controller is configured in SMBus mode + * @host_notify_client: SMBus host-notify client */ struct stm32f7_i2c_dev { struct i2c_adapter adap; @@ -327,6 +331,8 @@ struct stm32f7_i2c_dev { u32 fmp_creg; u32 fmp_mask; bool wakeup_src; + bool smbus_mode; + struct i2c_client *host_notify_client; }; /* @@ -1321,11 +1327,19 @@ static int stm32f7_i2c_get_free_slave_id(struct stm32f7_i2c_dev *i2c_dev, int i; /* - * slave[0] supports 7-bit and 10-bit slave address - * slave[1] supports 7-bit slave address only + * slave[0] support only SMBus Host address (0x8) + * slave[1] supports 7-bit and 10-bit slave address + * slave[2] supports 7-bit slave address only */ - for (i = STM32F7_I2C_MAX_SLAVE - 1; i >= 0; i--) { - if (i == 1 && (slave->flags & I2C_CLIENT_TEN)) + if (i2c_dev->smbus_mode && (slave->addr == 0x08)) { + if (i2c_dev->slave[0]) + goto fail; + *id = 0; + return 0; + } + + for (i = STM32F7_I2C_MAX_SLAVE - 1; i > 0; i--) { + if (i == 2 && (slave->flags & I2C_CLIENT_TEN)) continue; if (!i2c_dev->slave[i]) { *id = i; @@ -1333,6 +1347,7 @@ static int stm32f7_i2c_get_free_slave_id(struct stm32f7_i2c_dev *i2c_dev, } } +fail: dev_err(dev, "Slave 0x%x could not be registered\n", slave->addr); return -EINVAL; @@ -1776,7 +1791,13 @@ static int stm32f7_i2c_reg_slave(struct i2c_client *slave) if (!stm32f7_i2c_is_slave_registered(i2c_dev)) stm32f7_i2c_enable_wakeup(i2c_dev, true); - if (id == 0) { + switch (id) { + case 0: + /* Slave SMBus Host */ + i2c_dev->slave[id] = slave; + break; + + case 1: /* Configure Own Address 1 */ oar1 = readl_relaxed(i2c_dev->base + STM32F7_I2C_OAR1); oar1 &= ~STM32F7_I2C_OAR1_MASK; @@ -1789,7 +1810,9 @@ static int stm32f7_i2c_reg_slave(struct i2c_client *slave) oar1 |= STM32F7_I2C_OAR1_OA1EN; i2c_dev->slave[id] = slave; writel_relaxed(oar1, i2c_dev->base + STM32F7_I2C_OAR1); - } else if (id == 1) { + break; + + case 2: /* Configure Own Address 2 */ oar2 = readl_relaxed(i2c_dev->base + STM32F7_I2C_OAR2); oar2 &= ~STM32F7_I2C_OAR2_MASK; @@ -1802,7 +1825,10 @@ static int stm32f7_i2c_reg_slave(struct i2c_client *slave) oar2 |= STM32F7_I2C_OAR2_OA2EN; i2c_dev->slave[id] = slave; writel_relaxed(oar2, i2c_dev->base + STM32F7_I2C_OAR2); - } else { + break; + + default: + dev_err(dev, "I2C slave id not supported\n"); ret = -ENODEV; goto pm_free; } @@ -1843,10 +1869,10 @@ static int stm32f7_i2c_unreg_slave(struct i2c_client *slave) if (ret < 0) return ret; - if (id == 0) { + if (id == 1) { mask = STM32F7_I2C_OAR1_OA1EN; stm32f7_i2c_clr_bits(base + STM32F7_I2C_OAR1, mask); - } else { + } else if (id == 2) { mask = STM32F7_I2C_OAR2_OA2EN; stm32f7_i2c_clr_bits(base + STM32F7_I2C_OAR2, mask); } @@ -1911,14 +1937,51 @@ static int stm32f7_i2c_setup_fm_plus_bits(struct platform_device *pdev, &i2c_dev->fmp_mask); } +static int stm32f7_i2c_enable_smbus_host(struct stm32f7_i2c_dev *i2c_dev) +{ + struct i2c_adapter *adap = &i2c_dev->adap; + void __iomem *base = i2c_dev->base; + struct i2c_client *client; + + client = i2c_new_slave_host_notify_device(adap); + if (IS_ERR(client)) + return PTR_ERR(client); + + i2c_dev->host_notify_client = client; + + /* Enable SMBus Host address */ + stm32f7_i2c_set_bits(base + STM32F7_I2C_CR1, STM32F7_I2C_CR1_SMBHEN); + + return 0; +} + +static void stm32f7_i2c_disable_smbus_host(struct stm32f7_i2c_dev *i2c_dev) +{ + void __iomem *base = i2c_dev->base; + + if (i2c_dev->host_notify_client) { + /* Disable SMBus Host address */ + stm32f7_i2c_clr_bits(base + STM32F7_I2C_CR1, + STM32F7_I2C_CR1_SMBHEN); + i2c_free_slave_host_notify_device(i2c_dev->host_notify_client); + } +} + static u32 stm32f7_i2c_func(struct i2c_adapter *adap) { - return I2C_FUNC_I2C | I2C_FUNC_10BIT_ADDR | I2C_FUNC_SLAVE | - I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE | - I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA | - I2C_FUNC_SMBUS_BLOCK_DATA | I2C_FUNC_SMBUS_BLOCK_PROC_CALL | - I2C_FUNC_SMBUS_PROC_CALL | I2C_FUNC_SMBUS_PEC | - I2C_FUNC_SMBUS_I2C_BLOCK; + struct stm32f7_i2c_dev *i2c_dev = i2c_get_adapdata(adap); + + u32 func = I2C_FUNC_I2C | I2C_FUNC_10BIT_ADDR | I2C_FUNC_SLAVE | + I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE | + I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA | + I2C_FUNC_SMBUS_BLOCK_DATA | I2C_FUNC_SMBUS_BLOCK_PROC_CALL | + I2C_FUNC_SMBUS_PROC_CALL | I2C_FUNC_SMBUS_PEC | + I2C_FUNC_SMBUS_I2C_BLOCK; + + if (i2c_dev->smbus_mode) + func |= I2C_FUNC_SMBUS_HOST_NOTIFY; + + return func; } static const struct i2c_algorithm stm32f7_i2c_algo = { @@ -2084,10 +2147,22 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) stm32f7_i2c_hw_config(i2c_dev); + i2c_dev->smbus_mode = of_property_read_bool(pdev->dev.of_node, "smbus"); + ret = i2c_add_adapter(adap); if (ret) goto pm_disable; + if (i2c_dev->smbus_mode) { + ret = stm32f7_i2c_enable_smbus_host(i2c_dev); + if (ret) { + dev_err(i2c_dev->dev, + "failed to enable SMBus Host-Notify protocol (%d)\n", + ret); + goto i2c_adapter_remove; + } + } + dev_info(i2c_dev->dev, "STM32F7 I2C-%d bus adapter\n", adap->nr); pm_runtime_mark_last_busy(i2c_dev->dev); @@ -2095,6 +2170,9 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) return 0; +i2c_adapter_remove: + i2c_del_adapter(adap); + pm_disable: pm_runtime_put_noidle(i2c_dev->dev); pm_runtime_disable(i2c_dev->dev); @@ -2126,6 +2204,8 @@ static int stm32f7_i2c_remove(struct platform_device *pdev) { struct stm32f7_i2c_dev *i2c_dev = platform_get_drvdata(pdev); + stm32f7_i2c_disable_smbus_host(i2c_dev); + i2c_del_adapter(&i2c_dev->adap); pm_runtime_get_sync(i2c_dev->dev); From 9374ed1dfe1f862f6dd7552b8164252fe5cc424f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 29 Aug 2020 22:38:09 +0200 Subject: [PATCH 040/243] i2c: rcar: improve bus busy detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I2C doesn't define a timeout for bus busy, so an arbitrary value like LOOP_TIMEOUT is not a good idea. Let's use the timeout value in struct adapter which is meant for such cases and is user-configurable (via IOCTL). To reduce the load, wait 10us instead of 1us which is good enough for the slow frequencies used by I2C. Finally, use the poll_timeout helper instead of open coding it. Signed-off-by: Wolfram Sang Reviewed-by: Niklas Söderlund Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rcar.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 9e883474db8c..c5ba018d74a7 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -221,18 +222,18 @@ static void rcar_i2c_init(struct rcar_i2c_priv *priv) static int rcar_i2c_bus_barrier(struct rcar_i2c_priv *priv) { - int i; + int ret; + u32 val; - for (i = 0; i < LOOP_TIMEOUT; i++) { - /* make sure that bus is not busy */ - if (!(rcar_i2c_read(priv, ICMCR) & FSDA)) - return 0; - udelay(1); + ret = readl_poll_timeout(priv->io + ICMCR, val, !(val & FSDA), 10, + priv->adap.timeout); + if (ret) { + /* Waiting did not help, try to recover */ + priv->recovery_icmcr = MDBS | OBPC | FSDA | FSCL; + ret = i2c_recover_bus(&priv->adap); } - /* Waiting did not help, try to recover */ - priv->recovery_icmcr = MDBS | OBPC | FSDA | FSCL; - return i2c_recover_bus(&priv->adap); + return ret; } static int rcar_i2c_clock_calculate(struct rcar_i2c_priv *priv) From 74779f6eeb5a1bbe0acc046a99a2ac5042e9a505 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 29 Aug 2020 22:38:10 +0200 Subject: [PATCH 041/243] i2c: rcar: refactor and shorten timeout when resetting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LOOP_TIMEOUT was only used back then because we didn't want to introduce another constant. The timeout value can easily be a magnitude shorter because the typical range is 3us - 8us. Refactor the code to use the poll_timeout helper, use a specific timeout value and get rid of the ugly LOOP_TIMEOUT constant. Signed-off-by: Wolfram Sang Reviewed-by: Niklas Söderlund Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rcar.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index c5ba018d74a7..bab90d3f68e6 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -146,9 +146,6 @@ struct rcar_i2c_priv { #define rcar_i2c_priv_to_dev(p) ((p)->adap.dev.parent) #define rcar_i2c_is_recv(p) ((p)->msg->flags & I2C_M_RD) -#define LOOP_TIMEOUT 1024 - - static void rcar_i2c_write(struct rcar_i2c_priv *priv, int reg, u32 val) { writel(val, priv->io + reg); @@ -760,20 +757,14 @@ static void rcar_i2c_release_dma(struct rcar_i2c_priv *priv) /* I2C is a special case, we need to poll the status of a reset */ static int rcar_i2c_do_reset(struct rcar_i2c_priv *priv) { - int i, ret; + int ret; ret = reset_control_reset(priv->rstc); if (ret) return ret; - for (i = 0; i < LOOP_TIMEOUT; i++) { - ret = reset_control_status(priv->rstc); - if (ret == 0) - return 0; - udelay(1); - } - - return -ETIMEDOUT; + return read_poll_timeout_atomic(reset_control_status, ret, ret == 0, 1, + 100, false, priv->rstc); } static int rcar_i2c_master_xfer(struct i2c_adapter *adap, From 432d159a02bdca446578290015d0874e26fc5342 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Sep 2020 17:06:42 +0200 Subject: [PATCH 042/243] i2c: mux: gpmux: Simplify with dev_err_probe() Common pattern of handling deferred probe can be simplified with dev_err_probe(). Less code and the error value gets printed. Signed-off-by: Krzysztof Kozlowski Acked-by: Peter Rosin Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-gpmux.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-gpmux.c b/drivers/i2c/muxes/i2c-mux-gpmux.c index f830535cff12..d3acd8d66c32 100644 --- a/drivers/i2c/muxes/i2c-mux-gpmux.c +++ b/drivers/i2c/muxes/i2c-mux-gpmux.c @@ -85,18 +85,14 @@ static int i2c_mux_probe(struct platform_device *pdev) return -ENOMEM; mux->control = devm_mux_control_get(dev, NULL); - if (IS_ERR(mux->control)) { - if (PTR_ERR(mux->control) != -EPROBE_DEFER) - dev_err(dev, "failed to get control-mux\n"); - return PTR_ERR(mux->control); - } + if (IS_ERR(mux->control)) + return dev_err_probe(dev, PTR_ERR(mux->control), + "failed to get control-mux\n"); parent = mux_parent_adapter(dev); - if (IS_ERR(parent)) { - if (PTR_ERR(parent) != -EPROBE_DEFER) - dev_err(dev, "failed to get i2c-parent adapter\n"); - return PTR_ERR(parent); - } + if (IS_ERR(parent)) + return dev_err_probe(dev, PTR_ERR(parent), + "failed to get i2c-parent adapter\n"); children = of_get_child_count(np); From 43f83cd020bea8bcbfdd3f87d8be889b4356ea34 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Sep 2020 17:06:43 +0200 Subject: [PATCH 043/243] i2c: mux: reg: Simplify with dev_err_probe() Common pattern of handling deferred probe can be simplified with dev_err_probe(). Less code and the error value gets printed. Signed-off-by: Krzysztof Kozlowski Acked-by: Peter Rosin Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-reg.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c index b59a62f8d7a6..0e0679f65cf7 100644 --- a/drivers/i2c/muxes/i2c-mux-reg.c +++ b/drivers/i2c/muxes/i2c-mux-reg.c @@ -171,13 +171,9 @@ static int i2c_mux_reg_probe(struct platform_device *pdev) sizeof(mux->data)); } else { ret = i2c_mux_reg_probe_dt(mux, pdev); - if (ret == -EPROBE_DEFER) - return ret; - - if (ret < 0) { - dev_err(&pdev->dev, "Error parsing device tree"); - return ret; - } + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Error parsing device tree"); } parent = i2c_get_adapter(mux->data.parent); From b713aa86df99f117f2ccee0ce22be03a54d24a90 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Sep 2020 17:06:37 +0200 Subject: [PATCH 044/243] i2c: bcm2835: Simplify with dev_err_probe() Common pattern of handling deferred probe can be simplified with dev_err_probe(). Less code and the error value gets printed. Signed-off-by: Krzysztof Kozlowski Acked-by: Florian Fainelli Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-bcm2835.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index 5dc519516292..37443edbf754 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -421,11 +421,9 @@ static int bcm2835_i2c_probe(struct platform_device *pdev) return PTR_ERR(i2c_dev->regs); mclk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(mclk)) { - if (PTR_ERR(mclk) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Could not get clock\n"); - return PTR_ERR(mclk); - } + if (IS_ERR(mclk)) + return dev_err_probe(&pdev->dev, PTR_ERR(mclk), + "Could not get clock\n"); i2c_dev->bus_clk = bcm2835_i2c_register_div(&pdev->dev, mclk, i2c_dev); From b62590a92058f3edf8884b46312848e8aafe0922 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Tue, 15 Sep 2020 11:11:41 +0200 Subject: [PATCH 045/243] i2c: stm32: name slave slot to ease maintenance Name slave slots in order to ease code maintenance. Signed-off-by: Alain Volmat Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f7.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 425eff5b18e0..e02dca90a54b 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -152,7 +152,12 @@ #define STM32F7_I2C_MAX_LEN 0xff #define STM32F7_I2C_DMA_LEN_MIN 0x16 -#define STM32F7_I2C_MAX_SLAVE 0x3 +enum { + STM32F7_SLAVE_HOSTNOTIFY, + STM32F7_SLAVE_7_10_BITS_ADDR, + STM32F7_SLAVE_7_BITS_ADDR, + STM32F7_I2C_MAX_SLAVE +}; #define STM32F7_I2C_DNF_DEFAULT 0 #define STM32F7_I2C_DNF_MAX 16 @@ -1327,19 +1332,20 @@ static int stm32f7_i2c_get_free_slave_id(struct stm32f7_i2c_dev *i2c_dev, int i; /* - * slave[0] support only SMBus Host address (0x8) - * slave[1] supports 7-bit and 10-bit slave address - * slave[2] supports 7-bit slave address only + * slave[STM32F7_SLAVE_HOSTNOTIFY] support only SMBus Host address (0x8) + * slave[STM32F7_SLAVE_7_10_BITS_ADDR] supports 7-bit and 10-bit slave address + * slave[STM32F7_SLAVE_7_BITS_ADDR] supports 7-bit slave address only */ if (i2c_dev->smbus_mode && (slave->addr == 0x08)) { - if (i2c_dev->slave[0]) + if (i2c_dev->slave[STM32F7_SLAVE_HOSTNOTIFY]) goto fail; - *id = 0; + *id = STM32F7_SLAVE_HOSTNOTIFY; return 0; } - for (i = STM32F7_I2C_MAX_SLAVE - 1; i > 0; i--) { - if (i == 2 && (slave->flags & I2C_CLIENT_TEN)) + for (i = STM32F7_I2C_MAX_SLAVE - 1; i > STM32F7_SLAVE_HOSTNOTIFY; i--) { + if ((i == STM32F7_SLAVE_7_BITS_ADDR) && + (slave->flags & I2C_CLIENT_TEN)) continue; if (!i2c_dev->slave[i]) { *id = i; From a8335c64c5f0916c9ee8b88835ce114085c3642e Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 11 Sep 2020 09:16:58 +0200 Subject: [PATCH 046/243] i2c: add slave testunit driver Here is an I2C slave backend driver which allows to test some uncommon functionalities of the I2C and SMBus world. Usually, you need specific devices to test e.g. SMBus Host Notify and such. With this driver you just need the slave interface of another I2C controller. This initial version has testcases for multi-master and SMBus Host Notify. Already planned but not yet implemented are SMBus Alert and messages with I2C_M_RECV_LEN. Please read the documentation for further details. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- Documentation/i2c/slave-testunit-backend.rst | 66 +++++++ drivers/i2c/Kconfig | 8 + drivers/i2c/Makefile | 1 + drivers/i2c/i2c-slave-testunit.c | 175 +++++++++++++++++++ 4 files changed, 250 insertions(+) create mode 100644 Documentation/i2c/slave-testunit-backend.rst create mode 100644 drivers/i2c/i2c-slave-testunit.c diff --git a/Documentation/i2c/slave-testunit-backend.rst b/Documentation/i2c/slave-testunit-backend.rst new file mode 100644 index 000000000000..f537c62a8a83 --- /dev/null +++ b/Documentation/i2c/slave-testunit-backend.rst @@ -0,0 +1,66 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================ +Linux I2C slave testunit backend +================================ + +by Wolfram Sang in 2020 + +This backend can be used to trigger test cases for I2C bus masters which +require a remote device with certain capabilities (and which are usually not so +easy to obtain). Examples include multi-master testing, and SMBus Host Notify +testing. For some tests, the I2C slave controller must be able to switch +between master and slave mode because it needs to send data, too. + +Note that this is a device for testing and debugging. It should not be enabled +in a production build. And while there is some versioning and we try hard to +keep backward compatibility, there is no stable ABI guaranteed! + +Instantiating the device is regular. Example for bus 0, address 0x30: + +# echo "slave-testunit 0x1030" > /sys/bus/i2c/devices/i2c-0/new_device + +After that, you will have a write-only device listening. Reads will return an +8-bit version number. The device consists of 4 8-bit registers and all must be +written to start a testcase, i.e. you must always write 4 bytes to the device. +The registers are: + +0x00 CMD - which test to trigger +0x01 DATAL - configuration byte 1 for the test +0x02 DATAH - configuration byte 2 for the test +0x03 DELAY - delay in n * 10ms until test is started + +Using 'i2cset' from the i2c-tools package, the generic command looks like: + +# i2cset -y i + +DELAY is a generic parameter which will delay the execution of the test in CMD. +The commands are described in the following section. An invalid command will +result in the transfer not being acknowledged. + +Commands +-------- + +0x00 NOOP (reserved for future use) + +0x01 READ_BYTES (also needs master mode) + DATAL - address to read data from + DATAH - number of bytes to read + +This is useful to test if your bus master driver is handling multi-master +correctly. You can trigger the testunit to read bytes from another device on +the bus. If the bus master under test also wants to access the bus at the same +time, the bus will be busy. Example to read 128 bytes from device 0x50 after +50ms of delay: + +# i2cset -y 0 0x30 0x01 0x50 0x80 0x05 i + +0x02 SMBUS_HOST_NOTIFY (also needs master mode) + DATAL - low byte of the status word to send + DATAH - high byte of the status word to send + +This test will send an SMBUS_HOST_NOTIFY message to the host. Note that the +status word is currently ignored in the Linux Kernel. Example to send a +notification after 10ms: + +# i2cset -y 0 0x30 0x02 0x42 0x64 0x01 i diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index 5449729cdb87..438905e2a1d0 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -125,6 +125,14 @@ config I2C_SLAVE_EEPROM This backend makes Linux behave like an I2C EEPROM. Please read Documentation/i2c/slave-eeprom-backend.rst for further details. +config I2C_SLAVE_TESTUNIT + tristate "I2C eeprom testunit driver" + help + This backend can be used to trigger test cases for I2C bus masters + which require a remote device with certain capabilities, e.g. + multi-master, SMBus Host Notify, etc. Please read + Documentation/i2c/slave-testunit-backend.rst for further details. + endif config I2C_DEBUG_CORE diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile index bed6ba63c983..c1d493dc9bac 100644 --- a/drivers/i2c/Makefile +++ b/drivers/i2c/Makefile @@ -16,5 +16,6 @@ obj-$(CONFIG_I2C_MUX) += i2c-mux.o obj-y += algos/ busses/ muxes/ obj-$(CONFIG_I2C_STUB) += i2c-stub.o obj-$(CONFIG_I2C_SLAVE_EEPROM) += i2c-slave-eeprom.o +obj-$(CONFIG_I2C_SLAVE_TESTUNIT) += i2c-slave-testunit.o ccflags-$(CONFIG_I2C_DEBUG_CORE) := -DDEBUG diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c new file mode 100644 index 000000000000..c288102de324 --- /dev/null +++ b/drivers/i2c/i2c-slave-testunit.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * I2C slave mode testunit + * + * Copyright (C) 2020 by Wolfram Sang, Sang Engineering + * Copyright (C) 2020 by Renesas Electronics Corporation + */ + +#include +#include +#include +#include +#include +#include +#include /* FIXME: is system_long_wq the best choice? */ + +#define TU_CUR_VERSION 0x01 + +enum testunit_cmds { + TU_CMD_READ_BYTES = 1, /* save 0 for ABORT, RESET or similar */ + TU_CMD_HOST_NOTIFY, + TU_NUM_CMDS +}; + +enum testunit_regs { + TU_REG_CMD, + TU_REG_DATAL, + TU_REG_DATAH, + TU_REG_DELAY, + TU_NUM_REGS +}; + +enum testunit_flags { + TU_FLAG_IN_PROCESS, +}; + +struct testunit_data { + unsigned long flags; + u8 regs[TU_NUM_REGS]; + u8 reg_idx; + struct i2c_client *client; + struct delayed_work worker; +}; + +static void i2c_slave_testunit_work(struct work_struct *work) +{ + struct testunit_data *tu = container_of(work, struct testunit_data, worker.work); + struct i2c_msg msg; + u8 msgbuf[256]; + int ret = 0; + + msg.addr = I2C_CLIENT_END; + msg.buf = msgbuf; + + switch (tu->regs[TU_REG_CMD]) { + case TU_CMD_READ_BYTES: + msg.addr = tu->regs[TU_REG_DATAL]; + msg.flags = I2C_M_RD; + msg.len = tu->regs[TU_REG_DATAH]; + break; + + case TU_CMD_HOST_NOTIFY: + msg.addr = 0x08; + msg.flags = 0; + msg.len = 3; + msgbuf[0] = tu->client->addr; + msgbuf[1] = tu->regs[TU_REG_DATAL]; + msgbuf[2] = tu->regs[TU_REG_DATAH]; + break; + + default: + break; + } + + if (msg.addr != I2C_CLIENT_END) { + ret = i2c_transfer(tu->client->adapter, &msg, 1); + /* convert '0 msgs transferred' to errno */ + ret = (ret == 0) ? -EIO : ret; + } + + if (ret < 0) + dev_err(&tu->client->dev, "CMD%02X failed (%d)\n", tu->regs[TU_REG_CMD], ret); + + clear_bit(TU_FLAG_IN_PROCESS, &tu->flags); +} + +static int i2c_slave_testunit_slave_cb(struct i2c_client *client, + enum i2c_slave_event event, u8 *val) +{ + struct testunit_data *tu = i2c_get_clientdata(client); + int ret = 0; + + switch (event) { + case I2C_SLAVE_WRITE_RECEIVED: + if (test_bit(TU_FLAG_IN_PROCESS, &tu->flags)) + return -EBUSY; + + if (tu->reg_idx < TU_NUM_REGS) + tu->regs[tu->reg_idx] = *val; + else + ret = -EMSGSIZE; + + if (tu->reg_idx <= TU_NUM_REGS) + tu->reg_idx++; + + /* TU_REG_CMD always written at this point */ + if (tu->regs[TU_REG_CMD] >= TU_NUM_CMDS) + ret = -EINVAL; + + break; + + case I2C_SLAVE_STOP: + if (tu->reg_idx == TU_NUM_REGS) { + set_bit(TU_FLAG_IN_PROCESS, &tu->flags); + queue_delayed_work(system_long_wq, &tu->worker, + msecs_to_jiffies(10 * tu->regs[TU_REG_DELAY])); + } + fallthrough; + + case I2C_SLAVE_WRITE_REQUESTED: + tu->reg_idx = 0; + break; + + case I2C_SLAVE_READ_REQUESTED: + case I2C_SLAVE_READ_PROCESSED: + *val = TU_CUR_VERSION; + break; + } + + return ret; +} + +static int i2c_slave_testunit_probe(struct i2c_client *client) +{ + struct testunit_data *tu; + + tu = devm_kzalloc(&client->dev, sizeof(struct testunit_data), GFP_KERNEL); + if (!tu) + return -ENOMEM; + + tu->client = client; + i2c_set_clientdata(client, tu); + INIT_DELAYED_WORK(&tu->worker, i2c_slave_testunit_work); + + return i2c_slave_register(client, i2c_slave_testunit_slave_cb); +}; + +static int i2c_slave_testunit_remove(struct i2c_client *client) +{ + struct testunit_data *tu = i2c_get_clientdata(client); + + cancel_delayed_work_sync(&tu->worker); + i2c_slave_unregister(client); + return 0; +} + +static const struct i2c_device_id i2c_slave_testunit_id[] = { + { "slave-testunit", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, i2c_slave_testunit_id); + +static struct i2c_driver i2c_slave_testunit_driver = { + .driver = { + .name = "i2c-slave-testunit", + }, + .probe_new = i2c_slave_testunit_probe, + .remove = i2c_slave_testunit_remove, + .id_table = i2c_slave_testunit_id, +}; +module_i2c_driver(i2c_slave_testunit_driver); + +MODULE_AUTHOR("Wolfram Sang "); +MODULE_DESCRIPTION("I2C slave mode test unit"); +MODULE_LICENSE("GPL v2"); From c4651f11d09ac975b6593653edbf74b892523a8f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 10 Sep 2020 11:11:18 +0200 Subject: [PATCH 047/243] i2c: rcar: add HostNotify support The I2C core can now utilize a slave interface to handle SMBus HostNotify events. Enable it in this driver. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-rcar.c | 31 ++++++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 46a24faef352..2077ed8de681 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -1181,6 +1181,7 @@ config I2C_RCAR tristate "Renesas R-Car I2C Controller" depends on ARCH_RENESAS || COMPILE_TEST select I2C_SLAVE + select I2C_SMBUS select RESET_CONTROLLER if ARCH_RCAR_GEN3 help If you say yes to this option, support will be included for the diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index bab90d3f68e6..93b872e41546 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -106,10 +107,11 @@ #define ID_ARBLOST (1 << 3) #define ID_NACK (1 << 4) /* persistent flags */ +#define ID_P_HOST_NOTIFY BIT(28) #define ID_P_REP_AFTER_RD BIT(29) #define ID_P_NO_RXDMA BIT(30) /* HW forbids RXDMA sometimes */ #define ID_P_PM_BLOCKED BIT(31) -#define ID_P_MASK GENMASK(31, 29) +#define ID_P_MASK GENMASK(31, 28) enum rcar_i2c_type { I2C_RCAR_GEN1, @@ -141,6 +143,8 @@ struct rcar_i2c_priv { struct reset_control *rstc; int irq; + + struct i2c_client *host_notify_client; }; #define rcar_i2c_priv_to_dev(p) ((p)->adap.dev.parent) @@ -875,14 +879,21 @@ static int rcar_unreg_slave(struct i2c_client *slave) static u32 rcar_i2c_func(struct i2c_adapter *adap) { + struct rcar_i2c_priv *priv = i2c_get_adapdata(adap); + /* * This HW can't do: * I2C_SMBUS_QUICK (setting FSB during START didn't work) * I2C_M_NOSTART (automatically sends address after START) * I2C_M_IGNORE_NAK (automatically sends STOP after NAK) */ - return I2C_FUNC_I2C | I2C_FUNC_SLAVE | - (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); + u32 func = I2C_FUNC_I2C | I2C_FUNC_SLAVE | + (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); + + if (priv->flags & ID_P_HOST_NOTIFY) + func |= I2C_FUNC_SMBUS_HOST_NOTIFY; + + return func; } static const struct i2c_algorithm rcar_i2c_algo = { @@ -982,6 +993,8 @@ static int rcar_i2c_probe(struct platform_device *pdev) else pm_runtime_put(dev); + if (of_property_read_bool(dev->of_node, "smbus")) + priv->flags |= ID_P_HOST_NOTIFY; priv->irq = platform_get_irq(pdev, 0); ret = devm_request_irq(dev, priv->irq, rcar_i2c_irq, 0, dev_name(dev), priv); @@ -996,10 +1009,20 @@ static int rcar_i2c_probe(struct platform_device *pdev) if (ret < 0) goto out_pm_disable; + if (priv->flags & ID_P_HOST_NOTIFY) { + priv->host_notify_client = i2c_new_slave_host_notify_device(adap); + if (IS_ERR(priv->host_notify_client)) { + ret = PTR_ERR(priv->host_notify_client); + goto out_del_device; + } + } + dev_info(dev, "probed\n"); return 0; + out_del_device: + i2c_del_adapter(&priv->adap); out_pm_put: pm_runtime_put(dev); out_pm_disable: @@ -1012,6 +1035,8 @@ static int rcar_i2c_remove(struct platform_device *pdev) struct rcar_i2c_priv *priv = platform_get_drvdata(pdev); struct device *dev = &pdev->dev; + if (priv->host_notify_client) + i2c_free_slave_host_notify_device(priv->host_notify_client); i2c_del_adapter(&priv->adap); rcar_i2c_release_dma(priv); if (priv->flags & ID_P_PM_BLOCKED) From b1d4dc15b2f430a4f541ab6c91e63a71cf230b7d Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Tue, 15 Sep 2020 09:38:18 +0800 Subject: [PATCH 048/243] i2c: Switch to using the new API kobj_to_dev() Switch to using the new API kobj_to_dev(). Signed-off-by: Tian Tao Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index fc55ea41d323..56622658b215 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -344,7 +344,7 @@ const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id, static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) { - struct device * const dev = container_of(kobj, struct device, kobj); + struct device * const dev = kobj_to_dev(kobj); return to_i2c_client(dev); } From 8ce98dd21fcfaab3b6177cfc167d6b6b71dc9fc1 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 19 Sep 2020 08:56:15 +0200 Subject: [PATCH 049/243] misc: eeprom: use helper to get i2c_client from kobj Slightly easier to read. Signed-off-by: Wolfram Sang Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/misc/eeprom/eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c index 226b5efa6a77..34fa385dfd4b 100644 --- a/drivers/misc/eeprom/eeprom.c +++ b/drivers/misc/eeprom/eeprom.c @@ -76,7 +76,7 @@ static ssize_t eeprom_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { - struct i2c_client *client = to_i2c_client(kobj_to_dev(kobj)); + struct i2c_client *client = kobj_to_i2c_client(kobj); struct eeprom_data *data = i2c_get_clientdata(client); u8 slice; From 27c90870e7018cd5b93991ca398444a9b0f74113 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 14 Sep 2020 12:40:33 +0200 Subject: [PATCH 050/243] i2c: stm32: fix error message on upon dma_request_chan & defer handling DMA usage is optional for the I2C driver. check for the -ENODEV error in order to avoid displaying an error when no DMA has been requested. Cleaning up the error messages during probe, remove the additional -EPROBE_DEFER within probe function since additional error message doesn't give much more information than what is already reported within the stm32_i2c_dma_request function. Signed-off-by: Alain Volmat Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32.c | 6 ++---- drivers/i2c/busses/i2c-stm32f7.c | 13 ++++++------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32.c b/drivers/i2c/busses/i2c-stm32.c index 3f69a3bb6119..468620db9ea5 100644 --- a/drivers/i2c/busses/i2c-stm32.c +++ b/drivers/i2c/busses/i2c-stm32.c @@ -26,7 +26,7 @@ struct stm32_i2c_dma *stm32_i2c_dma_request(struct device *dev, dma->chan_tx = dma_request_chan(dev, "tx"); if (IS_ERR(dma->chan_tx)) { ret = PTR_ERR(dma->chan_tx); - if (ret != -EPROBE_DEFER) + if ((ret != -ENODEV) && (ret != -EPROBE_DEFER)) dev_err(dev, "can't request DMA tx channel\n"); goto fail_al; } @@ -46,7 +46,7 @@ struct stm32_i2c_dma *stm32_i2c_dma_request(struct device *dev, dma->chan_rx = dma_request_chan(dev, "rx"); if (IS_ERR(dma->chan_rx)) { ret = PTR_ERR(dma->chan_rx); - if (ret != -EPROBE_DEFER) + if ((ret != -ENODEV) && (ret != -EPROBE_DEFER)) dev_err(dev, "can't request DMA rx channel\n"); goto fail_tx; @@ -76,8 +76,6 @@ struct stm32_i2c_dma *stm32_i2c_dma_request(struct device *dev, dma_release_channel(dma->chan_tx); fail_al: devm_kfree(dev, dma); - if (ret != -EPROBE_DEFER) - dev_info(dev, "can't use DMA\n"); return ERR_PTR(ret); } diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index e02dca90a54b..2b9f0400f5d7 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -2121,14 +2121,13 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) i2c_dev->dma = stm32_i2c_dma_request(i2c_dev->dev, phy_addr, STM32F7_I2C_TXDR, STM32F7_I2C_RXDR); - if (PTR_ERR(i2c_dev->dma) == -ENODEV) - i2c_dev->dma = NULL; - else if (IS_ERR(i2c_dev->dma)) { + if (IS_ERR(i2c_dev->dma)) { ret = PTR_ERR(i2c_dev->dma); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "Failed to request dma error %i\n", ret); - goto fmp_clear; + /* DMA support is optional, only report other errors */ + if (ret != -ENODEV) + goto fmp_clear; + dev_dbg(i2c_dev->dev, "No DMA option: fallback using interrupts\n"); + i2c_dev->dma = NULL; } if (i2c_dev->wakeup_src) { From 703b3228a8fffcadf4651fde527a1b68c693e3a1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 14 Sep 2020 12:40:34 +0200 Subject: [PATCH 051/243] i2c: stm32: Simplify with dev_err_probe() Common pattern of handling deferred probe can be simplified with dev_err_probe(). Less code and the error value gets printed. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Alain Volmat Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32.c | 10 ++++++---- drivers/i2c/busses/i2c-stm32f4.c | 6 ++---- drivers/i2c/busses/i2c-stm32f7.c | 14 +++++--------- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32.c b/drivers/i2c/busses/i2c-stm32.c index 468620db9ea5..157c64e27d0b 100644 --- a/drivers/i2c/busses/i2c-stm32.c +++ b/drivers/i2c/busses/i2c-stm32.c @@ -26,8 +26,9 @@ struct stm32_i2c_dma *stm32_i2c_dma_request(struct device *dev, dma->chan_tx = dma_request_chan(dev, "tx"); if (IS_ERR(dma->chan_tx)) { ret = PTR_ERR(dma->chan_tx); - if ((ret != -ENODEV) && (ret != -EPROBE_DEFER)) - dev_err(dev, "can't request DMA tx channel\n"); + if (ret != -ENODEV) + ret = dev_err_probe(dev, ret, + "can't request DMA tx channel\n"); goto fail_al; } @@ -46,8 +47,9 @@ struct stm32_i2c_dma *stm32_i2c_dma_request(struct device *dev, dma->chan_rx = dma_request_chan(dev, "rx"); if (IS_ERR(dma->chan_rx)) { ret = PTR_ERR(dma->chan_rx); - if ((ret != -ENODEV) && (ret != -EPROBE_DEFER)) - dev_err(dev, "can't request DMA rx channel\n"); + if (ret != -ENODEV) + ret = dev_err_probe(dev, ret, + "can't request DMA rx channel\n"); goto fail_tx; } diff --git a/drivers/i2c/busses/i2c-stm32f4.c b/drivers/i2c/busses/i2c-stm32f4.c index 48e269284369..937c2c8fd349 100644 --- a/drivers/i2c/busses/i2c-stm32f4.c +++ b/drivers/i2c/busses/i2c-stm32f4.c @@ -797,10 +797,8 @@ static int stm32f4_i2c_probe(struct platform_device *pdev) rst = devm_reset_control_get_exclusive(&pdev->dev, NULL); if (IS_ERR(rst)) { - ret = PTR_ERR(rst); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "Error: Missing reset ctrl\n"); - + ret = dev_err_probe(&pdev->dev, PTR_ERR(rst), + "Error: Missing reset ctrl\n"); goto clk_free; } reset_control_assert(rst); diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 2b9f0400f5d7..f41f51a176a1 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -2037,11 +2037,9 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) "wakeup-source"); i2c_dev->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(i2c_dev->clk)) { - if (PTR_ERR(i2c_dev->clk) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Failed to get controller clock\n"); - return PTR_ERR(i2c_dev->clk); - } + if (IS_ERR(i2c_dev->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(i2c_dev->clk), + "Failed to get controller clock\n"); ret = clk_prepare_enable(i2c_dev->clk); if (ret) { @@ -2051,10 +2049,8 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) rst = devm_reset_control_get(&pdev->dev, NULL); if (IS_ERR(rst)) { - ret = PTR_ERR(rst); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "Error: Missing reset ctrl\n"); - + ret = dev_err_probe(&pdev->dev, PTR_ERR(rst), + "Error: Missing reset ctrl\n"); goto clk_free; } reset_control_assert(rst); From 97c93e06f506107362f1630aa25ec3e3ae71d070 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 4 Sep 2020 11:06:47 -0700 Subject: [PATCH 052/243] i2c: amd_mp2: handle num is 0 input for i2c_amd_xfer clang static analyzer reports this problem i2c-amd-mp2-plat.c:174:9: warning: Branch condition evaluates to a garbage value return err ? err : num; ^~~ err is not initialized, it depends on the being set in the transfer loop which will not happen if num is 0. Surveying other master_xfer() implementations show all handle a 0 num. Because returning 0 is expected, initialize err to 0. Signed-off-by: Tom Rix Acked-by: Elie Morisse Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-amd-mp2-plat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-amd-mp2-plat.c b/drivers/i2c/busses/i2c-amd-mp2-plat.c index 17df9e8845b6..506433bc0ff2 100644 --- a/drivers/i2c/busses/i2c-amd-mp2-plat.c +++ b/drivers/i2c/busses/i2c-amd-mp2-plat.c @@ -155,7 +155,7 @@ static int i2c_amd_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) struct amd_i2c_dev *i2c_dev = i2c_get_adapdata(adap); int i; struct i2c_msg *pmsg; - int err; + int err = 0; /* the adapter might have been deleted while waiting for the bus lock */ if (unlikely(!i2c_dev->common.mp2_dev)) From b74b33f5f7eb5e3cdc134b3f7fb3daa224415ffd Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 4 Sep 2020 15:11:50 +0200 Subject: [PATCH 053/243] dt-bindings: i2c: ingenic: Add compatible string for the JZ4770 The I2C controller in the JZ4770 SoC seems to work the exact same as in the JZ4780 SoC. We could use "ingenic,jz4780-i2c" as a fallback string in the Device Tree, but that would be awkward, since the JZ4780 is newer. Instead, add a "ingenic,jz4770-i2c" string and use it as fallback for the "ingenic,jz4780-i2c" string. Signed-off-by: Paul Cercueil Reviewed-by: Rob Herring Signed-off-by: Wolfram Sang --- .../devicetree/bindings/i2c/ingenic,i2c.yaml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml b/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml index 682ed1bbf5c6..0e7b4b8a7e48 100644 --- a/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml @@ -17,9 +17,13 @@ properties: pattern: "^i2c@[0-9a-f]+$" compatible: - enum: - - ingenic,jz4780-i2c - - ingenic,x1000-i2c + oneOf: + - enum: + - ingenic,jz4770-i2c + - ingenic,x1000-i2c + - items: + - const: ingenic,jz4780-i2c + - const: ingenic,jz4770-i2c reg: maxItems: 1 @@ -60,7 +64,7 @@ examples: #include #include i2c@10054000 { - compatible = "ingenic,jz4780-i2c"; + compatible = "ingenic,jz4780-i2c", "ingenic,jz4770-i2c"; #address-cells = <1>; #size-cells = <0>; reg = <0x10054000 0x1000>; From 266fd91f2b2fb4efe50550298e8bb0692fb9477f Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 4 Sep 2020 15:11:51 +0200 Subject: [PATCH 054/243] i2c: jz4780: Add compatible string for JZ4770 SoC The I2C controller in the JZ4770 SoC seems to work the exact same as in the JZ4780 SoC. Signed-off-by: Paul Cercueil Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-jz4780.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index ba831df6661e..ed2ec86f6f1a 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -752,6 +752,7 @@ static const struct ingenic_i2c_config x1000_i2c_config = { }; static const struct of_device_id jz4780_i2c_of_matches[] = { + { .compatible = "ingenic,jz4770-i2c", .data = &jz4780_i2c_config }, { .compatible = "ingenic,jz4780-i2c", .data = &jz4780_i2c_config }, { .compatible = "ingenic,x1000-i2c", .data = &x1000_i2c_config }, { /* sentinel */ } From 7eecca9daf9acafd8d3b910af52bd1453aa501da Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 4 Sep 2020 15:11:52 +0200 Subject: [PATCH 055/243] i2c: jz4780: Remove of_match_ptr() CONFIG_OF is selected by CONFIG_MACH_INGENIC, therefore we don't need to handle the case where Device Tree is not supported. Signed-off-by: Paul Cercueil Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-jz4780.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index ed2ec86f6f1a..cb4a25ebb890 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -857,7 +857,7 @@ static struct platform_driver jz4780_i2c_driver = { .remove = jz4780_i2c_remove, .driver = { .name = "jz4780-i2c", - .of_match_table = of_match_ptr(jz4780_i2c_of_matches), + .of_match_table = jz4780_i2c_of_matches, }, }; From 77dae8056a13158251d012a90abb0e076fc3c98a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Jul 2020 16:43:55 +0300 Subject: [PATCH 056/243] i2c: ismt: Describe parameters in kernel doc Kernel doc validation script complains: CHECK .../drivers/i2c/busses/i2c-ismt.c .../i2c-ismt.c:182: warning: cannot understand function prototype: 'const struct pci_device_id ismt_ids[] = ' .../i2c-ismt.c:202: warning: Function parameter or member 'dev' not described in '__ismt_desc_dump' .../i2c-ismt.c:202: warning: Function parameter or member 'desc' not described in '__ismt_desc_dump' .../i2c-ismt.c:649: warning: cannot understand function prototype: 'const struct i2c_algorithm smbus_algorithm = ' Fix corresponding descriptions to make reader and kernel doc validator happy. Signed-off-by: Andy Shevchenko Reviewed-by: Seth Heasley Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-ismt.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index 2f95e25a10f7..006c0dbec68f 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -176,9 +176,6 @@ struct ismt_priv { u8 buffer[I2C_SMBUS_BLOCK_MAX + 16]; /* temp R/W data buffer */ }; -/** - * ismt_ids - PCI device IDs supported by this driver - */ static const struct pci_device_id ismt_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_S1200_SMT0) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_S1200_SMT1) }, @@ -197,6 +194,8 @@ MODULE_PARM_DESC(bus_speed, "Bus Speed in kHz (0 = BIOS default)"); /** * __ismt_desc_dump() - dump the contents of a specific descriptor + * @dev: the iSMT device + * @desc: the iSMT hardware descriptor */ static void __ismt_desc_dump(struct device *dev, const struct ismt_desc *desc) { @@ -628,11 +627,6 @@ static u32 ismt_func(struct i2c_adapter *adap) I2C_FUNC_SMBUS_PEC; } -/** - * smbus_algorithm - the adapter algorithm and supported functionality - * @smbus_xfer: the adapter algorithm - * @functionality: functionality supported by the adapter - */ static const struct i2c_algorithm smbus_algorithm = { .smbus_xfer = ismt_access, .functionality = ismt_func, From 86d36a5e9c1b3c3aeb4b60d0f5f496d1162321f4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 15 Jun 2020 19:10:01 +0300 Subject: [PATCH 057/243] i2c: ismt: Add support for Intel Emmitsburg PCH Add PCI ID for the Intel Emmitsburg PCH iSMT SMBus controller. Signed-off-by: Andy Shevchenko Reviewed-by: Seth Heasley Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-ismt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index 006c0dbec68f..a35a27c320e7 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -77,6 +77,7 @@ #define PCI_DEVICE_ID_INTEL_S1200_SMT1 0x0c5a #define PCI_DEVICE_ID_INTEL_CDF_SMT 0x18ac #define PCI_DEVICE_ID_INTEL_DNV_SMT 0x19ac +#define PCI_DEVICE_ID_INTEL_EBG_SMT 0x1bff #define PCI_DEVICE_ID_INTEL_AVOTON_SMT 0x1f15 #define ISMT_DESC_ENTRIES 2 /* number of descriptor entries */ @@ -181,6 +182,7 @@ static const struct pci_device_id ismt_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_S1200_SMT1) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CDF_SMT) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMT) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EBG_SMT) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AVOTON_SMT) }, { 0, } }; From 9b65b020fff70bc070c4cb8fcf8ea8a1f70b111b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Jul 2020 17:53:19 +0300 Subject: [PATCH 058/243] i2c: nvidia-gpu: Use put_unaligned_be24() This makes the driver code slightly easier to read. Signed-off-by: Andy Shevchenko Reviewed-by: Ajay Gupta Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-nvidia-gpu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c index f480105000b8..f9a69b109e5c 100644 --- a/drivers/i2c/busses/i2c-nvidia-gpu.c +++ b/drivers/i2c/busses/i2c-nvidia-gpu.c @@ -125,8 +125,7 @@ static int gpu_i2c_read(struct gpu_i2c_dev *i2cd, u8 *data, u16 len) put_unaligned_be16(val, data); break; case 3: - put_unaligned_be16(val >> 8, data); - data[2] = val; + put_unaligned_be24(val, data); break; case 4: put_unaligned_be32(val, data); From 48bb6ec17cde09122931438add817a69a91c3ab6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 10 Jun 2020 00:22:57 +0100 Subject: [PATCH 059/243] NFS: remove redundant pointer clnt The pointer clnt is being initialized with a value that is never read and so this is assignment redundant and can be removed. The pointer can removed because it is being used as a temporary variable and it is clearer to make the direct assignment and remove it completely. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6e95c85fe395..1d23dc640208 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9693,7 +9693,6 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; - struct rpc_clnt *clnt = server->client; struct nfs4_call_sync_data data = { .seq_server = server, .seq_args = &args.seq_args, @@ -9710,8 +9709,7 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, int status; if (use_integrity) { - clnt = server->nfs_client->cl_rpcclient; - task_setup.rpc_client = clnt; + task_setup.rpc_client = server->nfs_client->cl_rpcclient; cred = nfs4_get_clid_cred(server->nfs_client); msg.rpc_cred = cred; From 82c596ebaa104f994d25256523ae2f9047323fe7 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 17 Jun 2020 09:09:39 +0800 Subject: [PATCH 060/243] nfs4: strengthen error check to avoid unexpected result The variable error is ssize_t, which is signed and will cast to unsigned when comapre with variable size, so add a check to avoid unexpected result in case of negative value of error. Signed-off-by: Chengguang Xu Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1d23dc640208..aca52e52538f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7440,7 +7440,7 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len) if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) { len = security_inode_listsecurity(inode, list, list_len); - if (list_len && len > list_len) + if (len >= 0 && list_len && len > list_len) return -ERANGE; } return len; From 88428cc4ae7abcc879295fbb19373dd76aad2bdd Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 23 Jun 2020 11:24:09 -0400 Subject: [PATCH 061/243] SUNRPC dont update timeout value on connection reset Current behaviour: every time a v3 operation is re-sent to the server we update (double) the timeout. There is no distinction between whether or not the previous timer had expired before the re-sent happened. Here's the scenario: 1. Client sends a v3 operation 2. Server RST-s the connection (prior to the timeout) (eg., connection is immediately reset) 3. Client re-sends a v3 operation but the timeout is now 120sec. As a result, an application sees 2mins pause before a retry in case server again does not reply. Where as if a connection reset didn't change the timeout value, the client would have re-tried (the 3rd time) after 60secs. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 62e0b6c1e8cf..35bbd9c16e87 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2405,7 +2405,8 @@ call_status(struct rpc_task *task) goto out_exit; } task->tk_action = call_encode; - rpc_check_timeout(task); + if (status != -ECONNRESET && status != -ECONNABORTED) + rpc_check_timeout(task); return; out_exit: rpc_call_rpcerror(task, status); From e4378a0fdd43a676bb4dbd858bc9e61c6661193b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:05 -0400 Subject: [PATCH 062/243] SUNRPC: Remove trace_xprt_complete_rqst() Request completion is already recorded by an "rpc_task_wakeup queue=xprt_pending" trace record. A subsequent rpc_xdr_recvfrom trace record shows the number of bytes received. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 1 - net/sunrpc/xprt.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 65d7dfbbc9cd..75b5df2a02fa 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -969,7 +969,6 @@ DECLARE_EVENT_CLASS(rpc_xprt_event, DEFINE_RPC_XPRT_EVENT(timer); DEFINE_RPC_XPRT_EVENT(lookup_rqst); -DEFINE_RPC_XPRT_EVENT(complete_rqst); TRACE_EVENT(xprt_transmit, TP_PROTO( diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5a8e47bbfb9f..7ad4009c9033 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1131,8 +1131,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - trace_xprt_complete_rqst(xprt, req->rq_xid, copied); - xprt->stat.recvs++; req->rq_private_buf.len = copied; From 06e234c6132784c56198423c653f1ad0e1e1fdc1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:11 -0400 Subject: [PATCH 063/243] SUNRPC: Hoist trace_xprtrdma_op_allocate into generic code Introduce a tracepoint in call_allocate that reports the exact sizes in the RPC buffer allocation request and the status of the result. This helps catch problems with XDR buffer provisioning, and replaces transport-specific debugging instrumentation. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 30 ------------------------------ include/trace/events/sunrpc.h | 30 ++++++++++++++++++++++++++++++ net/sunrpc/clnt.c | 3 +-- net/sunrpc/sched.c | 2 -- net/sunrpc/xprtrdma/transport.c | 2 -- 5 files changed, 31 insertions(+), 36 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index abe942225637..c187a9a8ead6 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1191,36 +1191,6 @@ TRACE_EVENT(xprtrdma_decode_seg, ** Allocation/release of rpcrdma_reqs and rpcrdma_reps **/ -TRACE_EVENT(xprtrdma_op_allocate, - TP_PROTO( - const struct rpc_task *task, - const struct rpcrdma_req *req - ), - - TP_ARGS(task, req), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(const void *, req) - __field(size_t, callsize) - __field(size_t, rcvsize) - ), - - TP_fast_assign( - __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client->cl_clid; - __entry->req = req; - __entry->callsize = task->tk_rqstp->rq_callsize; - __entry->rcvsize = task->tk_rqstp->rq_rcvsize; - ), - - TP_printk("task:%u@%u req=%p (%zu, %zu)", - __entry->task_id, __entry->client_id, - __entry->req, __entry->callsize, __entry->rcvsize - ) -); - TRACE_EVENT(xprtrdma_op_free, TP_PROTO( const struct rpc_task *task, diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 75b5df2a02fa..7addf7d1596b 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -517,6 +517,36 @@ DEFINE_RPC_REPLY_EVENT(stale_creds); DEFINE_RPC_REPLY_EVENT(bad_creds); DEFINE_RPC_REPLY_EVENT(auth_tooweak); +TRACE_EVENT(rpc_buf_alloc, + TP_PROTO( + const struct rpc_task *task, + int status + ), + + TP_ARGS(task, status), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(size_t, callsize) + __field(size_t, recvsize) + __field(int, status) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->callsize = task->tk_rqstp->rq_callsize; + __entry->recvsize = task->tk_rqstp->rq_rcvsize; + __entry->status = status; + ), + + TP_printk("task:%u@%u callsize=%zu recvsize=%zu status=%d", + __entry->task_id, __entry->client_id, + __entry->callsize, __entry->recvsize, __entry->status + ) +); + TRACE_EVENT(rpc_call_rpcerror, TP_PROTO( const struct rpc_task *task, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 35bbd9c16e87..3ab9e6a97898 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1823,6 +1823,7 @@ call_allocate(struct rpc_task *task) req->rq_rcvsize <<= 2; status = xprt->ops->buf_alloc(task); + trace_rpc_buf_alloc(task, status); xprt_inject_disconnect(xprt); if (status == 0) return; @@ -1831,8 +1832,6 @@ call_allocate(struct rpc_task *task) return; } - dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); - if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) { task->tk_action = call_allocate; rpc_delay(task, HZ>>4); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 7eba20a88438..adce1e2ed10d 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1036,8 +1036,6 @@ int rpc_malloc(struct rpc_task *task) return -ENOMEM; buf->len = size; - dprintk("RPC: %5u allocated buffer of size %zu at %p\n", - task->tk_pid, size, buf); rqst->rq_buffer = buf->data; rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; return 0; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 053c8ab1265a..612b60f31302 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -586,11 +586,9 @@ xprt_rdma_allocate(struct rpc_task *task) rqst->rq_buffer = rdmab_data(req->rl_sendbuf); rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf); - trace_xprtrdma_op_allocate(task, req); return 0; out_fail: - trace_xprtrdma_op_allocate(task, NULL); return -ENOMEM; } From 78069487539dbb18e10aae15644019b468829ab2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:16 -0400 Subject: [PATCH 064/243] SUNRPC: Remove debugging instrumentation from xprt_release These instruments don't appear to add any substantial value. We already have this at the termination of each RPC: iozone-2617 [002] 975.713126: rpc_stats_latency: task:418@5 xid=0x260eab5d nfsv3 LOOKUP backlog=15 rtt=32 execute=58 iozone-2617 [002] 975.713127: xprt_release_cong: task:418@5 snd_task:4294967295 cong=256 cwnd=16384 iozone-2617 [002] 975.713127: xprt_put_cong: task:418@5 snd_task:4294967295 cong=0 cwnd=16384 Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 32 -------------------------------- net/sunrpc/sched.c | 3 --- net/sunrpc/xprt.c | 1 - net/sunrpc/xprtrdma/transport.c | 2 -- 4 files changed, 38 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index c187a9a8ead6..1e17c2fc9640 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1187,38 +1187,6 @@ TRACE_EVENT(xprtrdma_decode_seg, ) ); -/** - ** Allocation/release of rpcrdma_reqs and rpcrdma_reps - **/ - -TRACE_EVENT(xprtrdma_op_free, - TP_PROTO( - const struct rpc_task *task, - const struct rpcrdma_req *req - ), - - TP_ARGS(task, req), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(const void *, req) - __field(const void *, rep) - ), - - TP_fast_assign( - __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client->cl_clid; - __entry->req = req; - __entry->rep = req->rl_reply; - ), - - TP_printk("task:%u@%u req=%p rep=%p", - __entry->task_id, __entry->client_id, - __entry->req, __entry->rep - ) -); - /** ** Callback events **/ diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index adce1e2ed10d..402b1c8869fd 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1056,9 +1056,6 @@ void rpc_free(struct rpc_task *task) buf = container_of(buffer, struct rpc_buffer, data); size = buf->len; - dprintk("RPC: freeing buffer of size %zu at %p\n", - size, buf); - if (size <= RPC_BUFFER_MAXSIZE) mempool_free(buf, rpc_buffer_mempool); else diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 7ad4009c9033..2217dfed8f88 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1836,7 +1836,6 @@ void xprt_release(struct rpc_task *task) if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); - dprintk("RPC: %5u release request %p\n", task->tk_pid, req); if (likely(!bc_prealloc(req))) xprt->ops->free_slot(xprt, req); else diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 612b60f31302..819a922830da 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -605,8 +605,6 @@ xprt_rdma_free(struct rpc_task *task) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - trace_xprtrdma_op_free(task, req); - if (!list_empty(&req->rl_registered)) frwr_unmap_sync(r_xprt, req); From 09d2ba0cb1b311b2e3b144a0bbccc99f3c0d82d1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:21 -0400 Subject: [PATCH 065/243] SUNRPC: Update debugging instrumentation in xprt_do_reserve() Replace a dprintk() with a tracepoint. The tracepoint marks the point where an RPC request is assigned an XID. Additional clean up: Remove trace_xprt_enq_xmit, which reports much the same thing. That tracepoint was added for debugging commit 918f3c1fe83c ("SUNRPC: Improve latency for interactive tasks"). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 55 +++++++++++++++-------------------- net/sunrpc/xprt.c | 8 ++--- 2 files changed, 26 insertions(+), 37 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 7addf7d1596b..781277bb9cb2 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1031,37 +1031,6 @@ TRACE_EVENT(xprt_transmit, __entry->seqno, __entry->status) ); -TRACE_EVENT(xprt_enq_xmit, - TP_PROTO( - const struct rpc_task *task, - int stage - ), - - TP_ARGS(task, stage), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(u32, xid) - __field(u32, seqno) - __field(int, stage) - ), - - TP_fast_assign( - __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client ? - task->tk_client->cl_clid : -1; - __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); - __entry->seqno = task->tk_rqstp->rq_seqno; - __entry->stage = stage; - ), - - TP_printk( - "task:%u@%u xid=0x%08x seqno=%u stage=%d", - __entry->task_id, __entry->client_id, __entry->xid, - __entry->seqno, __entry->stage) -); - TRACE_EVENT(xprt_ping, TP_PROTO(const struct rpc_xprt *xprt, int status), @@ -1176,6 +1145,30 @@ DEFINE_CONG_EVENT(release_cong); DEFINE_CONG_EVENT(get_cong); DEFINE_CONG_EVENT(put_cong); +TRACE_EVENT(xprt_reserve, + TP_PROTO( + const struct rpc_rqst *rqst + ), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqst->rq_xid); + ), + + TP_printk("task:%u@%u xid=0x%08x", + __entry->task_id, __entry->client_id, __entry->xid + ) +); + TRACE_EVENT(xs_stream_read_data, TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total), diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2217dfed8f88..0e4659bd72f4 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1267,7 +1267,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task) /* Note: req is added _before_ pos */ list_add_tail(&req->rq_xmit, &pos->rq_xmit); INIT_LIST_HEAD(&req->rq_xmit2); - trace_xprt_enq_xmit(task, 1); goto out; } } else if (RPC_IS_SWAPPER(task)) { @@ -1279,7 +1278,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task) /* Note: req is added _before_ pos */ list_add_tail(&req->rq_xmit, &pos->rq_xmit); INIT_LIST_HEAD(&req->rq_xmit2); - trace_xprt_enq_xmit(task, 2); goto out; } } else if (!req->rq_seqno) { @@ -1288,13 +1286,11 @@ xprt_request_enqueue_transmit(struct rpc_task *task) continue; list_add_tail(&req->rq_xmit2, &pos->rq_xmit2); INIT_LIST_HEAD(&req->rq_xmit); - trace_xprt_enq_xmit(task, 3); goto out; } } list_add_tail(&req->rq_xmit, &xprt->xmit_queue); INIT_LIST_HEAD(&req->rq_xmit2); - trace_xprt_enq_xmit(task, 4); out: set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); spin_unlock(&xprt->queue_lock); @@ -1745,8 +1741,8 @@ xprt_request_init(struct rpc_task *task) req->rq_rcv_buf.bvec = NULL; req->rq_release_snd_buf = NULL; xprt_init_majortimeo(task, req); - dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, - req, ntohl(req->rq_xid)); + + trace_xprt_reserve(req); } static void From 9ce07ae5eb1d968c975a4dace0cac25d9e602c9a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:26 -0400 Subject: [PATCH 066/243] SUNRPC: Replace dprintk() call site in xprt_prepare_transmit Generate a trace event when an RPC request is queued without being sent immediately. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 1 + net/sunrpc/xprt.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 781277bb9cb2..9746a722c5a2 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1093,6 +1093,7 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, DEFINE_WRITELOCK_EVENT(reserve_xprt); DEFINE_WRITELOCK_EVENT(release_xprt); +DEFINE_WRITELOCK_EVENT(transmit_queued); DECLARE_EVENT_CLASS(xprt_cong_event, TP_PROTO( diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 0e4659bd72f4..9da7c6e72c2d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1408,9 +1408,9 @@ bool xprt_prepare_transmit(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); - if (!xprt_lock_write(xprt, task)) { + trace_xprt_transmit_queued(xprt, task); + /* Race breaker: someone may have transmitted us */ if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) rpc_wake_up_queued_task_set_status(&xprt->sending, From 015747d296798510f94035a832252fcf477db434 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:32 -0400 Subject: [PATCH 067/243] SUNRPC: Replace dprintk() call site in xs_nospace() "no socket space" is an exceptional and infrequent condition that troubleshooters want to know about. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 28 ++++++++++++++++++++++++++++ net/sunrpc/xprtsock.c | 5 +---- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 9746a722c5a2..90d7b9916fe3 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -898,6 +898,34 @@ DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection); DEFINE_RPC_SOCKET_EVENT(rpc_socket_close); DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown); +TRACE_EVENT(rpc_socket_nospace, + TP_PROTO( + const struct rpc_rqst *rqst, + const struct sock_xprt *transport + ), + + TP_ARGS(rqst, transport), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(unsigned int, total) + __field(unsigned int, remaining) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->total = rqst->rq_slen; + __entry->remaining = rqst->rq_slen - transport->xmit.offset; + ), + + TP_printk("task:%u@%u total=%u remaining=%u", + __entry->task_id, __entry->client_id, + __entry->total, __entry->remaining + ) +); + TRACE_DEFINE_ENUM(XPRT_LOCKED); TRACE_DEFINE_ENUM(XPRT_CONNECTED); TRACE_DEFINE_ENUM(XPRT_CONNECTING); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 554e1bb4c1c7..7090bbee0ec5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -762,10 +762,7 @@ static int xs_nospace(struct rpc_rqst *req) struct sock *sk = transport->inet; int ret = -EAGAIN; - dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", - req->rq_task->tk_pid, - req->rq_slen - transport->xmit.offset, - req->rq_slen); + trace_rpc_socket_nospace(req, transport); /* Protect against races with write_space */ spin_lock(&xprt->transport_lock); From 6387039d6d753e02d9a6eed9a80a38a386d996d3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:37 -0400 Subject: [PATCH 068/243] SUNRPC: Remove the dprint_status() macro Clean up: The rpc_task_run_action tracepoint serves the same purpose. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 3ab9e6a97898..b51e744206d2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -47,10 +47,6 @@ # define RPCDBG_FACILITY RPCDBG_CALL #endif -#define dprint_status(t) \ - dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \ - __func__, t->tk_status) - /* * All RPC clients are linked into this list */ @@ -1658,8 +1654,6 @@ call_start(struct rpc_task *task) static void call_reserve(struct rpc_task *task) { - dprint_status(task); - task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); @@ -1675,8 +1669,6 @@ call_reserveresult(struct rpc_task *task) { int status = task->tk_status; - dprint_status(task); - /* * After a call to xprt_reserve(), we must have either * a request slot or else an error status. @@ -1717,8 +1709,6 @@ call_reserveresult(struct rpc_task *task) static void call_retry_reserve(struct rpc_task *task) { - dprint_status(task); - task->tk_status = 0; task->tk_action = call_reserveresult; xprt_retry_reserve(task); @@ -1730,8 +1720,6 @@ call_retry_reserve(struct rpc_task *task) static void call_refresh(struct rpc_task *task) { - dprint_status(task); - task->tk_action = call_refreshresult; task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; @@ -1746,8 +1734,6 @@ call_refreshresult(struct rpc_task *task) { int status = task->tk_status; - dprint_status(task); - task->tk_status = 0; task->tk_action = call_refresh; switch (status) { @@ -1792,8 +1778,6 @@ call_allocate(struct rpc_task *task) const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; int status; - dprint_status(task); - task->tk_status = 0; task->tk_action = call_encode; @@ -1882,7 +1866,7 @@ call_encode(struct rpc_task *task) { if (!rpc_task_need_encode(task)) goto out; - dprint_status(task); + /* Dequeue task from the receive queue while we're encoding */ xprt_request_dequeue_xprt(task); /* Encode here so that rpcsec_gss can use correct sequence number. */ @@ -1959,8 +1943,6 @@ call_bind(struct rpc_task *task) return; } - dprint_status(task); - task->tk_action = call_bind_status; if (!xprt_prepare_transmit(task)) return; @@ -1982,7 +1964,6 @@ call_bind_status(struct rpc_task *task) return; } - dprint_status(task); trace_rpc_bind_status(task); if (task->tk_status >= 0) goto out_next; @@ -2109,7 +2090,6 @@ call_connect_status(struct rpc_task *task) return; } - dprint_status(task); trace_rpc_connect_status(task); if (task->tk_status == 0) { @@ -2177,8 +2157,6 @@ call_transmit(struct rpc_task *task) return; } - dprint_status(task); - task->tk_action = call_transmit_status; if (!xprt_prepare_transmit(task)) return; @@ -2213,7 +2191,6 @@ call_transmit_status(struct rpc_task *task) switch (task->tk_status) { default: - dprint_status(task); break; case -EBADMSG: task->tk_status = 0; @@ -2295,8 +2272,6 @@ call_bc_transmit_status(struct rpc_task *task) if (rpc_task_transmitted(task)) task->tk_status = 0; - dprint_status(task); - switch (task->tk_status) { case 0: /* Success */ @@ -2356,8 +2331,6 @@ call_status(struct rpc_task *task) if (!task->tk_msg.rpc_proc->p_proc) trace_xprt_ping(task->tk_xprt, task->tk_status); - dprint_status(task); - status = task->tk_status; if (status >= 0) { task->tk_action = call_decode; @@ -2492,8 +2465,6 @@ call_decode(struct rpc_task *task) struct xdr_stream xdr; int err; - dprint_status(task); - if (!task->tk_msg.rpc_proc->p_decode) { task->tk_action = rpc_exit_task; return; From 0ec36cc9cd991d0cd9055949abd582921bbcfea8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:42 -0400 Subject: [PATCH 069/243] SUNRPC: Remove dprintk call site in call_start() Clean up: The rpc_rpc_request tracepoint serves the same purpose. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b51e744206d2..7d97d18df9bd 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1635,10 +1635,6 @@ call_start(struct rpc_task *task) int idx = task->tk_msg.rpc_proc->p_statidx; trace_rpc_request(task); - dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, - clnt->cl_program->name, clnt->cl_vers, - rpc_proc_name(task), - (RPC_IS_ASYNC(task) ? "async" : "sync")); /* Increment call count (version might not be valid for ping) */ if (clnt->cl_program->version[clnt->cl_vers]) From db0a86c4266ac36769f54e2ce33fff0300c9bc00 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:47 -0400 Subject: [PATCH 070/243] SUNRPC: Replace connect dprintk call sites with a tracepoint This trace event can be used to audit transport connections from the client. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 1 + net/sunrpc/clnt.c | 4 ---- net/sunrpc/xprt.c | 3 +-- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 90d7b9916fe3..0aa15cc3985d 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -983,6 +983,7 @@ DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class, TP_ARGS(xprt)) DEFINE_RPC_XPRT_LIFETIME_EVENT(create); +DEFINE_RPC_XPRT_LIFETIME_EVENT(connect); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7d97d18df9bd..48d86814b942 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2055,10 +2055,6 @@ call_connect(struct rpc_task *task) return; } - dprintk("RPC: %5u call_connect xprt %p %s connected\n", - task->tk_pid, xprt, - (xprt_connected(xprt) ? "is" : "is not")); - task->tk_action = call_connect_status; if (task->tk_status < 0) return; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9da7c6e72c2d..ceb190d1754d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -834,8 +834,7 @@ void xprt_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - dprintk("RPC: %5u xprt_connect xprt %p %s connected\n", task->tk_pid, - xprt, (xprt_connected(xprt) ? "is" : "is not")); + trace_xprt_connect(xprt); if (!xprt_bound(xprt)) { task->tk_status = -EAGAIN; From 6f9f17287e78e5049931af2037b15b26d134a32a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:53 -0400 Subject: [PATCH 071/243] SUNRPC: Mitigate cond_resched() in xprt_transmit() The original purpose of this expensive call is to prevent a long queue of requests from blocking other work. The cond_resched() call is unnecessary after just a single send operation. For longer queues, instead of invoking the kernel scheduler, simply release the transport send lock and return to the RPC scheduler. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ceb190d1754d..f6c17e75f20e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1513,10 +1513,13 @@ xprt_transmit(struct rpc_task *task) { struct rpc_rqst *next, *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int status; + int counter, status; spin_lock(&xprt->queue_lock); + counter = 0; while (!list_empty(&xprt->xmit_queue)) { + if (++counter == 20) + break; next = list_first_entry(&xprt->xmit_queue, struct rpc_rqst, rq_xmit); xprt_pin_rqst(next); @@ -1524,7 +1527,6 @@ xprt_transmit(struct rpc_task *task) status = xprt_request_transmit(next, task); if (status == -EBADMSG && next != req) status = 0; - cond_resched(); spin_lock(&xprt->queue_lock); xprt_unpin_rqst(next); if (status == 0) { From 914cdcc78a668a90bc627542f82fc3a92525141c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:58 -0400 Subject: [PATCH 072/243] SUNRPC: Add trace_rpc_timeout_status() For a long while we've wanted a tracepoint that fires when a major timeout is reported in the system log. Such a tracepoint can be attached to other actions that can take place when a timeout is detected (eg, server or connection health assessment). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 1 + net/sunrpc/clnt.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 0aa15cc3985d..edb41e187cf6 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -261,6 +261,7 @@ DECLARE_EVENT_CLASS(rpc_task_status, DEFINE_RPC_STATUS_EVENT(call); DEFINE_RPC_STATUS_EVENT(bind); DEFINE_RPC_STATUS_EVENT(connect); +DEFINE_RPC_STATUS_EVENT(timeout); TRACE_EVENT(rpc_request, TP_PROTO(const struct rpc_task *task), diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 48d86814b942..cabde92f283d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2398,7 +2398,7 @@ rpc_check_timeout(struct rpc_task *task) if (xprt_adjust_timeout(task->tk_rqstp) == 0) return; - dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); + trace_rpc_timeout_status(task); task->tk_timeouts++; if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) { From 7c8099f6ada2654ef87aa992ec1b69b86249a353 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:03 -0400 Subject: [PATCH 073/243] SUNRPC: Trace call_refresh events Clean up: Replace dprintk call sites. Note that rpc_call_rpcerror() already has a trace point, so perhaps adding trace_rpc_refresh_status() isn't necessary. However, it does report a particular category of error. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 2 ++ net/sunrpc/clnt.c | 9 +++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index edb41e187cf6..3c5d707f7d54 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -262,6 +262,8 @@ DEFINE_RPC_STATUS_EVENT(call); DEFINE_RPC_STATUS_EVENT(bind); DEFINE_RPC_STATUS_EVENT(connect); DEFINE_RPC_STATUS_EVENT(timeout); +DEFINE_RPC_STATUS_EVENT(retry_refresh); +DEFINE_RPC_STATUS_EVENT(refresh); TRACE_EVENT(rpc_request, TP_PROTO(const struct rpc_task *task), diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cabde92f283d..af7c88e41e11 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1752,12 +1752,10 @@ call_refreshresult(struct rpc_task *task) if (!task->tk_cred_retry) break; task->tk_cred_retry--; - dprintk("RPC: %5u %s: retry refresh creds\n", - task->tk_pid, __func__); + trace_rpc_retry_refresh_status(task); return; } - dprintk("RPC: %5u %s: refresh creds failed with error %d\n", - task->tk_pid, __func__, status); + trace_rpc_refresh_status(task); rpc_call_rpcerror(task, status); } @@ -1881,8 +1879,7 @@ call_encode(struct rpc_task *task) } else { task->tk_action = call_refresh; task->tk_cred_retry--; - dprintk("RPC: %5u %s: retry refresh creds\n", - task->tk_pid, __func__); + trace_rpc_retry_refresh_status(task); } break; default: From fd66e2a79d96efcbea78e0dc33797e25fe353a71 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:08 -0400 Subject: [PATCH 074/243] SUNRPC: Remove dprintk call site in call_decode Clean up. When enabled, this dprintk adds a line in /var/log/messages after every RPC that reports the task ID (no connection to on the wire XID values) and the RPC's result (no connection to the program, operation, or the arguments and results). Thus it's value is pretty low. Let's remove it. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index af7c88e41e11..e13db512b164 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2497,8 +2497,6 @@ call_decode(struct rpc_task *task) case 0: task->tk_action = rpc_exit_task; task->tk_status = rpcauth_unwrap_resp(task, &xdr); - dprintk("RPC: %5u %s result %d\n", - task->tk_pid, __func__, task->tk_status); return; case -EAGAIN: task->tk_status = 0; From 42ebfc2cbf22df0abf2a17414db256d1db87c154 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:14 -0400 Subject: [PATCH 075/243] SUNRPC: Clean up call_bind_status() observability Time to remove dprintk call sites in here. Regarding the rpc_bind_status tracepoint: It's friendlier to administrators if they don't have to look up the error code to figure out what went wrong. Replace trace_rpc_bind_status with a set of tracepoints that report more specifically what the problem was, and what RPC program/version was being queried. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 14 +++++++++++++- net/sunrpc/clnt.c | 20 ++++++-------------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 3c5d707f7d54..353d9a18e254 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -259,7 +259,6 @@ DECLARE_EVENT_CLASS(rpc_task_status, TP_ARGS(task)) DEFINE_RPC_STATUS_EVENT(call); -DEFINE_RPC_STATUS_EVENT(bind); DEFINE_RPC_STATUS_EVENT(connect); DEFINE_RPC_STATUS_EVENT(timeout); DEFINE_RPC_STATUS_EVENT(retry_refresh); @@ -520,6 +519,19 @@ DEFINE_RPC_REPLY_EVENT(stale_creds); DEFINE_RPC_REPLY_EVENT(bad_creds); DEFINE_RPC_REPLY_EVENT(auth_tooweak); +#define DEFINE_RPCB_ERROR_EVENT(name) \ + DEFINE_EVENT(rpc_reply_event, rpcb_##name##_err, \ + TP_PROTO( \ + const struct rpc_task *task \ + ), \ + TP_ARGS(task)) + +DEFINE_RPCB_ERROR_EVENT(prog_unavail); +DEFINE_RPCB_ERROR_EVENT(timeout); +DEFINE_RPCB_ERROR_EVENT(bind_version); +DEFINE_RPCB_ERROR_EVENT(unreachable); +DEFINE_RPCB_ERROR_EVENT(unrecognized); + TRACE_EVENT(rpc_buf_alloc, TP_PROTO( const struct rpc_task *task, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e13db512b164..3259120462ed 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1957,7 +1957,6 @@ call_bind_status(struct rpc_task *task) return; } - trace_rpc_bind_status(task); if (task->tk_status >= 0) goto out_next; if (xprt_bound(xprt)) { @@ -1967,12 +1966,10 @@ call_bind_status(struct rpc_task *task) switch (task->tk_status) { case -ENOMEM: - dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); rpc_delay(task, HZ >> 2); goto retry_timeout; case -EACCES: - dprintk("RPC: %5u remote rpcbind: RPC program/version " - "unavailable\n", task->tk_pid); + trace_rpcb_prog_unavail_err(task); /* fail immediately if this is an RPC ping */ if (task->tk_msg.rpc_proc->p_proc == 0) { status = -EOPNOTSUPP; @@ -1989,17 +1986,14 @@ call_bind_status(struct rpc_task *task) case -EAGAIN: goto retry_timeout; case -ETIMEDOUT: - dprintk("RPC: %5u rpcbind request timed out\n", - task->tk_pid); + trace_rpcb_timeout_err(task); goto retry_timeout; case -EPFNOSUPPORT: /* server doesn't support any rpcbind version we know of */ - dprintk("RPC: %5u unrecognized remote rpcbind service\n", - task->tk_pid); + trace_rpcb_bind_version_err(task); break; case -EPROTONOSUPPORT: - dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n", - task->tk_pid); + trace_rpcb_bind_version_err(task); goto retry_timeout; case -ECONNREFUSED: /* connection problems */ case -ECONNRESET: @@ -2010,8 +2004,7 @@ call_bind_status(struct rpc_task *task) case -EHOSTUNREACH: case -ENETUNREACH: case -EPIPE: - dprintk("RPC: %5u remote rpcbind unreachable: %d\n", - task->tk_pid, task->tk_status); + trace_rpcb_unreachable_err(task); if (!RPC_IS_SOFTCONN(task)) { rpc_delay(task, 5*HZ); goto retry_timeout; @@ -2019,8 +2012,7 @@ call_bind_status(struct rpc_task *task) status = task->tk_status; break; default: - dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", - task->tk_pid, -task->tk_status); + trace_rpcb_unrecognized_err(task); } rpc_call_rpcerror(task, status); From e465cc3fa86341121205d3faca26866bdf331ed5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:19 -0400 Subject: [PATCH 076/243] SUNRPC: Remove rpcb_getport_async dprintk call sites In many cases, tracepoints already report these errors. In others, the dprintks were mainly useful when this code was less mature. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 35 +++++++++++++++++++++++++++++++++++ net/sunrpc/rpcb_clnt.c | 23 +++-------------------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 353d9a18e254..6e5eba54fd30 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1269,6 +1269,41 @@ TRACE_EVENT(xs_stream_read_request, __entry->copied, __entry->reclen, __entry->offset) ); +TRACE_EVENT(rpcb_getport, + TP_PROTO( + const struct rpc_clnt *clnt, + const struct rpc_task *task, + unsigned int bind_version + ), + + TP_ARGS(clnt, task, bind_version), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(unsigned int, program) + __field(unsigned int, version) + __field(int, protocol) + __field(unsigned int, bind_version) + __string(servername, task->tk_xprt->servername) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = clnt->cl_clid; + __entry->program = clnt->cl_prog; + __entry->version = clnt->cl_vers; + __entry->protocol = task->tk_xprt->prot; + __entry->bind_version = bind_version; + __assign_str(servername, task->tk_xprt->servername); + ), + + TP_printk("task:%u@%u server=%s program=%u version=%u protocol=%d bind_version=%u", + __entry->task_id, __entry->client_id, __get_str(servername), + __entry->program, __entry->version, __entry->protocol, + __entry->bind_version + ) +); DECLARE_EVENT_CLASS(svc_xdr_buf_class, TP_PROTO( diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 4a67685c83eb..e306aab89a0b 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -31,6 +31,8 @@ #include #include +#include + #include "netns.h" #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -693,18 +695,12 @@ void rpcb_getport_async(struct rpc_task *task) rcu_read_unlock(); xprt = xprt_get(task->tk_xprt); - dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", - task->tk_pid, __func__, - xprt->servername, clnt->cl_prog, clnt->cl_vers, xprt->prot); - /* Put self on the wait queue to ensure we get notified if * some other task is already attempting to bind the port */ rpc_sleep_on_timeout(&xprt->binding, task, NULL, jiffies + xprt->bind_timeout); if (xprt_test_and_set_binding(xprt)) { - dprintk("RPC: %5u %s: waiting for another binder\n", - task->tk_pid, __func__); xprt_put(xprt); return; } @@ -712,8 +708,6 @@ void rpcb_getport_async(struct rpc_task *task) /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; - dprintk("RPC: %5u %s: already bound\n", - task->tk_pid, __func__); goto bailout_nofree; } @@ -732,20 +726,15 @@ void rpcb_getport_async(struct rpc_task *task) break; default: status = -EAFNOSUPPORT; - dprintk("RPC: %5u %s: bad address family\n", - task->tk_pid, __func__); goto bailout_nofree; } if (proc == NULL) { xprt->bind_index = 0; status = -EPFNOSUPPORT; - dprintk("RPC: %5u %s: no more getport versions available\n", - task->tk_pid, __func__); goto bailout_nofree; } - dprintk("RPC: %5u %s: trying rpcbind version %u\n", - task->tk_pid, __func__, bind_version); + trace_rpcb_getport(clnt, task, bind_version); rpcb_clnt = rpcb_create(xprt->xprt_net, clnt->cl_nodename, @@ -754,16 +743,12 @@ void rpcb_getport_async(struct rpc_task *task) clnt->cl_cred); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", - task->tk_pid, __func__, PTR_ERR(rpcb_clnt)); goto bailout_nofree; } map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS); if (!map) { status = -ENOMEM; - dprintk("RPC: %5u %s: no memory available\n", - task->tk_pid, __func__); goto bailout_release_client; } map->r_prog = clnt->cl_prog; @@ -780,8 +765,6 @@ void rpcb_getport_async(struct rpc_task *task) map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS); if (!map->r_addr) { status = -ENOMEM; - dprintk("RPC: %5u %s: no memory available\n", - task->tk_pid, __func__); goto bailout_free_args; } map->r_owner = ""; From ac1ae534215b9b0a346547654b4720abd0882f15 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:24 -0400 Subject: [PATCH 077/243] SUNRPC: Hoist trace_xprtrdma_op_setport into generic code Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 1 - include/trace/events/sunrpc.h | 29 +++++++++++++++++++++++++++++ net/sunrpc/rpcb_clnt.c | 29 ++++++++++++++--------------- net/sunrpc/xprtrdma/transport.c | 3 --- 4 files changed, 43 insertions(+), 19 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 1e17c2fc9640..ad46bec3a65e 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -423,7 +423,6 @@ DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); -DEFINE_RXPRT_EVENT(xprtrdma_op_setport); TRACE_EVENT(xprtrdma_op_connect, TP_PROTO( diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 6e5eba54fd30..05291ce099d6 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1305,6 +1305,35 @@ TRACE_EVENT(rpcb_getport, ) ); +TRACE_EVENT(rpcb_setport, + TP_PROTO( + const struct rpc_task *task, + int status, + unsigned short port + ), + + TP_ARGS(task, status, port), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(int, status) + __field(unsigned short, port) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->status = status; + __entry->port = port; + ), + + TP_printk("task:%u@%u status=%d port=%u", + __entry->task_id, __entry->client_id, + __entry->status, __entry->port + ) +); + DECLARE_EVENT_CLASS(svc_xdr_buf_class, TP_PROTO( const struct svc_rqst *rqst, diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index e306aab89a0b..cdf86c0580d3 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -801,34 +801,33 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) { struct rpcbind_args *map = data; struct rpc_xprt *xprt = map->r_xprt; - int status = child->tk_status; + + map->r_status = child->tk_status; /* Garbage reply: retry with a lesser rpcbind version */ - if (status == -EIO) - status = -EPROTONOSUPPORT; + if (map->r_status == -EIO) + map->r_status = -EPROTONOSUPPORT; /* rpcbind server doesn't support this rpcbind protocol version */ - if (status == -EPROTONOSUPPORT) + if (map->r_status == -EPROTONOSUPPORT) xprt->bind_index++; - if (status < 0) { + if (map->r_status < 0) { /* rpcbind server not available on remote host? */ - xprt->ops->set_port(xprt, 0); + map->r_port = 0; + } else if (map->r_port == 0) { /* Requested RPC service wasn't registered on remote host */ - xprt->ops->set_port(xprt, 0); - status = -EACCES; + map->r_status = -EACCES; } else { /* Succeeded */ - xprt->ops->set_port(xprt, map->r_port); - xprt_set_bound(xprt); - status = 0; + map->r_status = 0; } - dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n", - child->tk_pid, status, map->r_port); - - map->r_status = status; + trace_rpcb_setport(child, map->r_status, map->r_port); + xprt->ops->set_port(xprt, map->r_port); + if (map->r_port) + xprt_set_bound(xprt); } /* diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 819a922830da..8915e42240d3 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -413,9 +413,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); snprintf(buf, sizeof(buf), "%4hx", port); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); - - trace_xprtrdma_op_setport(container_of(xprt, struct rpcrdma_xprt, - rx_xprt)); } /** From 15a798d6ce7ea67680973541e8a690edee9a71e9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:29 -0400 Subject: [PATCH 078/243] SUNRPC: Remove dprintk call sites in rpcbind XDR functions Clean up: Other XDR functions no longer have dprintk call sites. These were added during development and can be removed now that the code is mature. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/rpcb_clnt.c | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index cdf86c0580d3..6df12a13edc6 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -840,11 +840,6 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb = data; __be32 *p; - dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, - rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); - p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2); *p++ = cpu_to_be32(rpcb->r_prog); *p++ = cpu_to_be32(rpcb->r_vers); @@ -866,8 +861,6 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, return -EIO; port = be32_to_cpup(p); - dprintk("RPC: %5u PMAP_%s result: %lu\n", req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, port); if (unlikely(port > USHRT_MAX)) return -EIO; @@ -888,11 +881,6 @@ static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr, *boolp = 0; if (*p != xdr_zero) *boolp = 1; - - dprintk("RPC: %5u RPCB_%s call %s\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, - (*boolp ? "succeeded" : "failed")); return 0; } @@ -917,12 +905,6 @@ static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb = data; __be32 *p; - dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, - rpcb->r_prog, rpcb->r_vers, - rpcb->r_netid, rpcb->r_addr); - p = xdr_reserve_space(xdr, (RPCB_program_sz + RPCB_version_sz) << 2); *p++ = cpu_to_be32(rpcb->r_prog); *p = cpu_to_be32(rpcb->r_vers); @@ -952,11 +934,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, * If the returned universal address is a null string, * the requested RPC service was not registered. */ - if (len == 0) { - dprintk("RPC: %5u RPCB reply: program not registered\n", - req->rq_task->tk_pid); + if (len == 0) return 0; - } if (unlikely(len > RPCBIND_MAXUADDRLEN)) goto out_fail; @@ -964,8 +943,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, p = xdr_inline_decode(xdr, len); if (unlikely(p == NULL)) goto out_fail; - dprintk("RPC: %5u RPCB_%s reply: %*pE\n", req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, len, (char *)p); if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len, sap, sizeof(address)) == 0) @@ -975,9 +952,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, return 0; out_fail: - dprintk("RPC: %5u malformed RPCB_%s reply\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name); return -EIO; } From 1e664987a9165ada0dfb347a9e6cf935a6d495e7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:35 -0400 Subject: [PATCH 079/243] SUNRPC: Remove more dprintks in rpcb_clnt.c Clean up: These are superfluous now that rpc_create() and friends have tracepoints to report errors. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/rpcb_clnt.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 6df12a13edc6..af2882c62a3b 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -218,10 +218,6 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, sn->rpcb_is_af_local = is_af_local ? 1 : 0; smp_wmb(); sn->rpcb_users = 1; - dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " - "%p, rpcb_local_clnt4: %p) for net %x%s\n", - sn->rpcb_local_clnt, sn->rpcb_local_clnt4, - net->ns.inum, (net == &init_net) ? " (init_net)" : ""); } /* @@ -263,19 +259,13 @@ static int rpcb_create_local_unix(struct net *net) */ clnt = rpc_create(&args); if (IS_ERR(clnt)) { - dprintk("RPC: failed to create AF_LOCAL rpcbind " - "client (errno %ld).\n", PTR_ERR(clnt)); result = PTR_ERR(clnt); goto out; } clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); - if (IS_ERR(clnt4)) { - dprintk("RPC: failed to bind second program to " - "rpcbind v4 client (errno %ld).\n", - PTR_ERR(clnt4)); + if (IS_ERR(clnt4)) clnt4 = NULL; - } rpcb_set_local(net, clnt, clnt4, true); @@ -311,8 +301,6 @@ static int rpcb_create_local_net(struct net *net) clnt = rpc_create(&args); if (IS_ERR(clnt)) { - dprintk("RPC: failed to create local rpcbind " - "client (errno %ld).\n", PTR_ERR(clnt)); result = PTR_ERR(clnt); goto out; } @@ -323,12 +311,8 @@ static int rpcb_create_local_net(struct net *net) * v4 upcalls. */ clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); - if (IS_ERR(clnt4)) { - dprintk("RPC: failed to bind second program to " - "rpcbind v4 client (errno %ld).\n", - PTR_ERR(clnt4)); + if (IS_ERR(clnt4)) clnt4 = NULL; - } rpcb_set_local(net, clnt, clnt4, false); @@ -405,11 +389,8 @@ static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, stru msg->rpc_resp = &result; error = rpc_call_sync(clnt, msg, flags); - if (error < 0) { - dprintk("RPC: failed to contact local rpcbind " - "server (errno %d).\n", -error); + if (error < 0) return error; - } if (!result) return -EACCES; From c3adcc7dfbfcb1bd7e29fa95f03d1f96b514e03e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:40 -0400 Subject: [PATCH 080/243] SUNRPC: Replace rpcbind dprintk call sites with tracepoints In many cases, tracepoints already report these errors. In others, the dprintks were mainly useful when this code was less mature. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 86 +++++++++++++++++++++++++++++++++++ net/sunrpc/rpcb_clnt.c | 24 ++-------- 2 files changed, 90 insertions(+), 20 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 05291ce099d6..b5d4cbbaf4b0 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1334,6 +1334,92 @@ TRACE_EVENT(rpcb_setport, ) ); +TRACE_EVENT(pmap_register, + TP_PROTO( + u32 program, + u32 version, + int protocol, + unsigned short port + ), + + TP_ARGS(program, version, protocol, port), + + TP_STRUCT__entry( + __field(unsigned int, program) + __field(unsigned int, version) + __field(int, protocol) + __field(unsigned int, port) + ), + + TP_fast_assign( + __entry->program = program; + __entry->version = version; + __entry->protocol = protocol; + __entry->port = port; + ), + + TP_printk("program=%u version=%u protocol=%d port=%u", + __entry->program, __entry->version, + __entry->protocol, __entry->port + ) +); + +TRACE_EVENT(rpcb_register, + TP_PROTO( + u32 program, + u32 version, + const char *addr, + const char *netid + ), + + TP_ARGS(program, version, addr, netid), + + TP_STRUCT__entry( + __field(unsigned int, program) + __field(unsigned int, version) + __string(addr, addr) + __string(netid, netid) + ), + + TP_fast_assign( + __entry->program = program; + __entry->version = version; + __assign_str(addr, addr); + __assign_str(netid, netid); + ), + + TP_printk("program=%u version=%u addr=%s netid=%s", + __entry->program, __entry->version, + __get_str(addr), __get_str(netid) + ) +); + +TRACE_EVENT(rpcb_unregister, + TP_PROTO( + u32 program, + u32 version, + const char *netid + ), + + TP_ARGS(program, version, netid), + + TP_STRUCT__entry( + __field(unsigned int, program) + __field(unsigned int, version) + __string(netid, netid) + ), + + TP_fast_assign( + __entry->program = program; + __entry->version = version; + __assign_str(netid, netid); + ), + + TP_printk("program=%u version=%u netid=%s", + __entry->program, __entry->version, __get_str(netid) + ) +); + DECLARE_EVENT_CLASS(svc_xdr_buf_class, TP_PROTO( const struct svc_rqst *rqst, diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index af2882c62a3b..38fe2ce8a5aa 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -35,10 +35,6 @@ #include "netns.h" -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_BIND -#endif - #define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock" #define RPCBIND_PROGRAM (100000u) @@ -444,9 +440,7 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); bool is_set = false; - dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " - "rpcbind\n", (port ? "" : "un"), - prog, vers, prot, port); + trace_pmap_register(prog, vers, prot, port); msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; if (port != 0) { @@ -472,11 +466,6 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); - dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " - "local rpcbind\n", (port ? "" : "un"), - map->r_prog, map->r_vers, - map->r_addr, map->r_netid); - msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; @@ -503,11 +492,6 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); - dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " - "local rpcbind\n", (port ? "" : "un"), - map->r_prog, map->r_vers, - map->r_addr, map->r_netid); - msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; @@ -524,9 +508,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, { struct rpcbind_args *map = msg->rpc_argp; - dprintk("RPC: unregistering [%u, %u, '%s'] with " - "local rpcbind\n", - map->r_prog, map->r_vers, map->r_netid); + trace_rpcb_unregister(map->r_prog, map->r_vers, map->r_netid); map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; @@ -598,6 +580,8 @@ int rpcb_v4_register(struct net *net, const u32 program, const u32 version, if (address == NULL) return rpcb_unregister_all_protofamilies(sn, &msg); + trace_rpcb_register(map.r_prog, map.r_vers, map.r_addr, map.r_netid); + switch (address->sa_family) { case AF_INET: return rpcb_register_inet4(sn, address, &msg); From 1466c2216382fc392817fc8888e4ebefb2ef4816 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:45 -0400 Subject: [PATCH 081/243] SUNRPC: Clean up RPC scheduler tracepoints Remove several redundant dprintk call sites, and replace a couple of potentially useful ones with tracepoints. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 2 ++ net/sunrpc/sched.c | 15 +++------------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index b5d4cbbaf4b0..11e5e52f4ce5 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -387,6 +387,8 @@ DECLARE_EVENT_CLASS(rpc_task_running, DEFINE_RPC_RUNNING_EVENT(begin); DEFINE_RPC_RUNNING_EVENT(run_action); +DEFINE_RPC_RUNNING_EVENT(sync_sleep); +DEFINE_RPC_RUNNING_EVENT(sync_wake); DEFINE_RPC_RUNNING_EVENT(complete); DEFINE_RPC_RUNNING_EVENT(signalled); DEFINE_RPC_RUNNING_EVENT(end); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 402b1c8869fd..a0d5a98fbf32 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -885,9 +885,6 @@ static void __rpc_execute(struct rpc_task *task) int task_is_async = RPC_IS_ASYNC(task); int status = 0; - dprintk("RPC: %5u __rpc_execute flags=0x%x\n", - task->tk_pid, task->tk_flags); - WARN_ON_ONCE(RPC_IS_QUEUED(task)); if (RPC_IS_QUEUED(task)) return; @@ -947,7 +944,7 @@ static void __rpc_execute(struct rpc_task *task) return; /* sync task: sleep here */ - dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid); + trace_rpc_task_sync_sleep(task, task->tk_action); status = out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_QUEUED, rpc_wait_bit_killable, TASK_KILLABLE); @@ -963,11 +960,9 @@ static void __rpc_execute(struct rpc_task *task) task->tk_rpc_status = -ERESTARTSYS; rpc_exit(task, -ERESTARTSYS); } - dprintk("RPC: %5u sync task resuming\n", task->tk_pid); + trace_rpc_task_sync_wake(task, task->tk_action); } - dprintk("RPC: %5u return %d, status %d\n", task->tk_pid, status, - task->tk_status); /* Release all resources associated with the task */ rpc_release_task(task); } @@ -1146,10 +1141,8 @@ static void rpc_free_task(struct rpc_task *task) put_rpccred(task->tk_op_cred); rpc_release_calldata(task->tk_ops, task->tk_calldata); - if (tk_flags & RPC_TASK_DYNAMIC) { - dprintk("RPC: %5u freeing task\n", task->tk_pid); + if (tk_flags & RPC_TASK_DYNAMIC) mempool_free(task, rpc_task_mempool); - } } static void rpc_async_release(struct work_struct *work) @@ -1203,8 +1196,6 @@ EXPORT_SYMBOL_GPL(rpc_put_task_async); static void rpc_release_task(struct rpc_task *task) { - dprintk("RPC: %5u release task\n", task->tk_pid); - WARN_ON_ONCE(RPC_IS_QUEUED(task)); rpc_release_resources_task(task); From 721a1d388b5536adb220aba25775a256f09790c3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:50 -0400 Subject: [PATCH 082/243] SUNRPC: Remove dprintk call sites in RPC queuing functions Remove redundant call sites or call sites that are already covered by tracepoints. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 1 + net/sunrpc/sched.c | 22 +--------------------- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 11e5e52f4ce5..f45b3c01370c 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -390,6 +390,7 @@ DEFINE_RPC_RUNNING_EVENT(run_action); DEFINE_RPC_RUNNING_EVENT(sync_sleep); DEFINE_RPC_RUNNING_EVENT(sync_wake); DEFINE_RPC_RUNNING_EVENT(complete); +DEFINE_RPC_RUNNING_EVENT(timeout); DEFINE_RPC_RUNNING_EVENT(signalled); DEFINE_RPC_RUNNING_EVENT(end); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index a0d5a98fbf32..116b3abaed3f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -85,7 +85,6 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { if (list_empty(&task->u.tk_wait.timer_list)) return; - dprintk("RPC: %5u disabling timer\n", task->tk_pid); task->tk_timeout = 0; list_del(&task->u.tk_wait.timer_list); if (list_empty(&queue->timer_list.list)) @@ -111,9 +110,6 @@ static void __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task, unsigned long timeout) { - dprintk("RPC: %5u setting alarm for %u ms\n", - task->tk_pid, jiffies_to_msecs(timeout - jiffies)); - task->tk_timeout = timeout; if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires)) rpc_set_queue_timer(queue, timeout); @@ -216,9 +212,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, /* barrier matches the read in rpc_wake_up_task_queue_locked() */ smp_wmb(); rpc_set_queued(task); - - dprintk("RPC: %5u added to queue %p \"%s\"\n", - task->tk_pid, queue, rpc_qname(queue)); } /* @@ -241,8 +234,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas else list_del(&task->u.tk_wait.list); queue->qlen--; - dprintk("RPC: %5u removed from queue %p \"%s\"\n", - task->tk_pid, queue, rpc_qname(queue)); } static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues) @@ -382,13 +373,9 @@ static void __rpc_do_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, unsigned char queue_priority) { - dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", - task->tk_pid, rpc_qname(q), jiffies); - trace_rpc_task_sleep(task, q); __rpc_add_wait_queue(q, task, queue_priority); - } static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, @@ -510,9 +497,6 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq, struct rpc_wait_queue *queue, struct rpc_task *task) { - dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n", - task->tk_pid, jiffies); - /* Has the task been executed yet? If not, we cannot wake it up! */ if (!RPC_IS_ACTIVATED(task)) { printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); @@ -524,8 +508,6 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq, __rpc_remove_wait_queue(queue, task); rpc_make_runnable(wq, task); - - dprintk("RPC: __rpc_wake_up_task done\n"); } /* @@ -663,8 +645,6 @@ struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq, { struct rpc_task *task = NULL; - dprintk("RPC: wake_up_first(%p \"%s\")\n", - queue, rpc_qname(queue)); spin_lock(&queue->lock); task = __rpc_find_next_queued(queue); if (task != NULL) @@ -770,7 +750,7 @@ static void __rpc_queue_timer_fn(struct work_struct *work) list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) { timeo = task->tk_timeout; if (time_after_eq(now, timeo)) { - dprintk("RPC: %5u timeout\n", task->tk_pid); + trace_rpc_task_timeout(task, task->tk_action); task->tk_status = -ETIMEDOUT; rpc_wake_up_task_queue_locked(queue, task); continue; From 5589cc4778e23424c44c3a82cd0685f87904fe91 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:56 -0400 Subject: [PATCH 083/243] SUNRPC: Remove remaining dprintks from sched.c Clean up. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/sched.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 116b3abaed3f..f06d7c315017 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -27,10 +27,6 @@ #include "sunrpc.h" -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -#define RPCDBG_FACILITY RPCDBG_SCHED -#endif - #define CREATE_TRACE_POINTS #include @@ -1065,9 +1061,6 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_action = rpc_prepare_task; rpc_init_task_statistics(task); - - dprintk("RPC: new task initialized, procpid %u\n", - task_pid_nr(current)); } static struct rpc_task * @@ -1091,7 +1084,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) rpc_init_task(task, setup_data); task->tk_flags |= flags; - dprintk("RPC: allocated task %p\n", task); return task; } @@ -1216,7 +1208,6 @@ static int rpciod_start(void) /* * Create the rpciod thread and wait for it to start. */ - dprintk("RPC: creating workqueue rpciod\n"); wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (!wq) goto out_failed; @@ -1241,7 +1232,6 @@ static void rpciod_stop(void) if (rpciod_workqueue == NULL) return; - dprintk("RPC: destroying workqueue rpciod\n"); wq = rpciod_workqueue; rpciod_workqueue = NULL; From 1138ce1cf60954d1c0e2d7b4eba5b4df5813fd86 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 18:31:30 -0700 Subject: [PATCH 084/243] sunrpc: fix duplicated word in Change "time time" to "time expiry_time" to match the field name. Signed-off-by: Randy Dunlap Cc: "J. Bruce Fields" Cc: Chuck Lever Cc: Trond Myklebust Cc: Anna Schumaker Cc: linux-nfs@vger.kernel.org Signed-off-by: Anna Schumaker --- include/linux/sunrpc/cache.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 10891b70fc7b..d0965e2997b0 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -45,7 +45,8 @@ */ struct cache_head { struct hlist_node cache_list; - time64_t expiry_time; /* After time time, don't use the data */ + time64_t expiry_time; /* After time expiry_time, don't use + * the data */ time64_t last_refresh; /* If CACHE_PENDING, this is when upcall was * sent, else this is when update was * received, though it is alway set to From 0bdd4cea12a9fd79a7eb7de8493a5fef54d0eea6 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Tue, 7 Jul 2020 21:50:12 +0200 Subject: [PATCH 085/243] Replace HTTP links with HTTPS ones: NFS, SUNRPC, and LOCKD clients Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Anna Schumaker --- fs/lockd/mon.c | 2 +- include/linux/sunrpc/bc_xprt.h | 2 +- include/linux/sunrpc/msg_prot.h | 2 +- net/sunrpc/backchannel_rqst.c | 2 +- net/sunrpc/sunrpc.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 1eabd91870e6..1d9488cf0534 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -417,7 +417,7 @@ void nsm_release(struct nsm_handle *nsm) /* * XDR functions for NSM. * - * See http://www.opengroup.org/ for details on the Network + * See https://www.opengroup.org/ for details on the Network * Status Monitor wire protocol. */ diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index d796058cdff2..f07c334c599f 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -4,7 +4,7 @@ NetApp provides this source code under the GPL v2 License. The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. +https://opensource.org/licenses/gpl-license.php. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index bea40d9f03a1..43f854487539 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -143,7 +143,7 @@ typedef __be32 rpc_fraghdr; /* * Well-known netids. See: * - * http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml + * https://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml */ #define RPCBIND_NETID_UDP "udp" #define RPCBIND_NETID_TCP "tcp" diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 195b40c5dae4..3fecad369592 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -5,7 +5,7 @@ NetApp provides this source code under the GPL v2 License. The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. +https://opensource.org/licenses/gpl-license.php. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index f6fe2e6cd65a..2f59464e6524 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -4,7 +4,7 @@ NetApp provides this source code under the GPL v2 License. The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. +https://opensource.org/licenses/gpl-license.php. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT From d8a6ad913c286d4763ae20b14c02fe6f39d7cd9f Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Tue, 4 Aug 2020 12:11:47 -0400 Subject: [PATCH 086/243] NFS4: Fix oops when copy_file_range is attempted with NFS4.0 source The following oops is seen during xfstest/565 when the 'test' (source of the copy) is NFS4.0 and 'scratch' (destination) is NFS4.2 [ 59.692458] run fstests generic/565 at 2020-08-01 05:50:35 [ 60.613588] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 60.624970] #PF: supervisor read access in kernel mode [ 60.627671] #PF: error_code(0x0000) - not-present page [ 60.630347] PGD 0 P4D 0 [ 60.631853] Oops: 0000 [#1] SMP PTI [ 60.634086] CPU: 6 PID: 2828 Comm: xfs_io Kdump: loaded Not tainted 5.8.0-rc3 #1 [ 60.637676] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 60.639901] RIP: 0010:nfs4_check_serverowner_major_id+0x5/0x30 [nfsv4] [ 60.642719] Code: 89 ff e8 3e b3 b8 e1 e9 71 fe ff ff 41 bc da d8 ff ff e9 c3 fe ff ff e8 e9 9d 08 e2 66 0f 1f 84 00 00 00 00 00 66 66 66 66 90 <8b> 57 08 31 c0 3b 56 08 75 12 48 83 c6 0c 48 83 c7 0c e8 c4 97 bb [ 60.652629] RSP: 0018:ffffc265417f7e10 EFLAGS: 00010287 [ 60.655379] RAX: ffffa0664b066400 RBX: 0000000000000000 RCX: 0000000000000001 [ 60.658754] RDX: ffffa066725fb000 RSI: ffffa066725fd000 RDI: 0000000000000000 [ 60.662292] RBP: 0000000000020000 R08: 0000000000020000 R09: 0000000000000000 [ 60.666189] R10: 0000000000000003 R11: 0000000000000000 R12: ffffa06648258d00 [ 60.669914] R13: 0000000000000000 R14: 0000000000000000 R15: ffffa06648258100 [ 60.673645] FS: 00007faa9fb35800(0000) GS:ffffa06677d80000(0000) knlGS:0000000000000000 [ 60.677698] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 60.680773] CR2: 0000000000000008 CR3: 0000000203f14000 CR4: 00000000000406e0 [ 60.684476] Call Trace: [ 60.685809] nfs4_copy_file_range+0xfc/0x230 [nfsv4] [ 60.688704] vfs_copy_file_range+0x2ee/0x310 [ 60.691104] __x64_sys_copy_file_range+0xd6/0x210 [ 60.693527] do_syscall_64+0x4d/0x90 [ 60.695512] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 60.698006] RIP: 0033:0x7faa9febc1bd Signed-off-by: Dave Wysochanski Signed-off-by: Anna Schumaker --- fs/nfs/nfs4file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index fdfc77486ace..91be7f628e4a 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -145,7 +145,8 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, /* Only offload copy if superblock is the same */ if (file_in->f_op != &nfs4_file_operations) return -EXDEV; - if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) + if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY) || + !nfs_server_capable(file_inode(file_in), NFS_CAP_COPY)) return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) return -EOPNOTSUPP; From cf65e49f89f2ccad54b1d560691cfa3cd371b2d2 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 20 Aug 2020 08:01:49 -0400 Subject: [PATCH 087/243] nfs: Convert to use the preferred fallthrough macro Convert the uses of fallthrough comments to fallthrough macro. Please see commit 294f69e662d1 ("compiler_attributes.h: Add 'fallthrough' pseudo keyword for switch/case use") for detail. Signed-off-by: Hongxiang Lou Signed-off-by: Miaohe Lin Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 7a70287f21a2..d20326ee0475 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -889,7 +889,7 @@ static struct nfs_server *nfs_try_mount_request(struct fs_context *fc) default: if (rpcauth_get_gssinfo(flavor, &info) != 0) continue; - /* Fallthrough */ + fallthrough; } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); ctx->selected_flavor = flavor; From 68274f97aeb6ebcd74c391ddbff0b517b9b0ca0f Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Thu, 27 Aug 2020 20:46:55 +0000 Subject: [PATCH 088/243] NFSv4.2: xattr cache: remove unused cache struct field The hash_lock field of the cache structure was a leftover of a previous iteration of the code. It is now unused, so remove it. Signed-off-by: Frank van der Linden Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xattr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index 86777996cfec..22396a7eebe1 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -67,7 +67,6 @@ struct nfs4_xattr_bucket { struct nfs4_xattr_cache { struct kref ref; - spinlock_t hash_lock; /* protects hashtable and lru */ struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE]; struct list_head lru; struct list_head dispose; From c0a1d129d3e01751d410343cb8e4a694716ca825 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Sep 2020 15:29:58 -0400 Subject: [PATCH 089/243] pNFS/flexfiles: Ensure we initialise the mirror bsizes correctly on read While it is true that reading from an unmirrored source always uses index 0, that is no longer true for mirrored sources when we fail over. Fixes: 563c53e73b8b ("NFS: Fix flexfiles read failover") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index ff8965d1a4d4..1edeebd51937 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -838,6 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; int ds_idx; + u32 i; retry: ff_layout_pg_check_layout(pgio, req); @@ -863,14 +864,14 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, goto retry; } - mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); + for (i = 0; i < pgio->pg_mirror_count; i++) { + mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); + pgm = &pgio->pg_mirrors[i]; + pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; + } pgio->pg_mirror_idx = ds_idx; - /* read always uses only one mirror - idx 0 for pgio layer */ - pgm = &pgio->pg_mirrors[0]; - pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; - if (NFS_SERVER(pgio->pg_inode)->flags & (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) pgio->pg_maxretrans = io_maxretrans; From c754e137f55e075d6b6ad9b866c32e9aad260a83 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Sep 2020 15:29:59 -0400 Subject: [PATCH 090/243] pNFS/flexfiles: Be consistent about mirror index types A mirror index is always of type u32. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 34 +++++++++++++------------- include/linux/nfs_xdr.h | 4 +-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 1edeebd51937..a163533446fa 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -715,7 +715,7 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, } static void -ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx) +ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); @@ -724,7 +724,7 @@ ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx) } static void -ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx) +ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); @@ -734,14 +734,14 @@ ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx) static struct nfs4_pnfs_ds * ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, int *best_idx, + u32 start_idx, u32 *best_idx, bool check_device) { struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; bool fail_return = false; - int idx; + u32 idx; /* mirrors are initially sorted by efficiency */ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { @@ -766,21 +766,21 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, static struct nfs4_pnfs_ds * ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, int *best_idx) + u32 start_idx, u32 *best_idx) { return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false); } static struct nfs4_pnfs_ds * ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, int *best_idx) + u32 start_idx, u32 *best_idx) { return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true); } static struct nfs4_pnfs_ds * ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, int *best_idx) + u32 start_idx, u32 *best_idx) { struct nfs4_pnfs_ds *ds; @@ -791,7 +791,8 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, } static struct nfs4_pnfs_ds * -ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, int *best_idx) +ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, + u32 *best_idx) { struct pnfs_layout_segment *lseg = pgio->pg_lseg; struct nfs4_pnfs_ds *ds; @@ -837,8 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_pgio_mirror *pgm; struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; - int ds_idx; - u32 i; + u32 ds_idx, i; retry: ff_layout_pg_check_layout(pgio, req); @@ -895,7 +895,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs4_ff_layout_mirror *mirror; struct nfs_pgio_mirror *pgm; struct nfs4_pnfs_ds *ds; - int i; + u32 i; retry: ff_layout_pg_check_layout(pgio, req); @@ -1039,7 +1039,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) { u32 idx = hdr->pgio_mirror_idx + 1; - int new_idx = 0; + u32 new_idx = 0; if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx)) ff_layout_send_layouterror(hdr->lseg); @@ -1076,7 +1076,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, - int idx) + u32 idx) { struct pnfs_layout_hdr *lo = lseg->pls_layout; struct inode *inode = lo->plh_inode; @@ -1150,7 +1150,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ static int ff_layout_async_handle_error_v3(struct rpc_task *task, struct pnfs_layout_segment *lseg, - int idx) + u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); @@ -1185,7 +1185,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, - int idx) + u32 idx) { int vers = clp->cl_nfs_mod->rpc_vers->number; @@ -1212,7 +1212,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task, } static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, - int idx, u64 offset, u64 length, + u32 idx, u64 offset, u64 length, u32 *op_status, int opnum, int error) { struct nfs4_ff_layout_mirror *mirror; @@ -1810,7 +1810,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) loff_t offset = hdr->args.offset; int vers; struct nfs_fh *fh; - int idx = hdr->pgio_mirror_idx; + u32 idx = hdr->pgio_mirror_idx; mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9408f3252c8e..69cb46f7b8d2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1611,8 +1611,8 @@ struct nfs_pgio_header { __u64 mds_offset; /* Filelayout dense stripe */ struct nfs_page_array page_array; struct nfs_client *ds_clp; /* pNFS data server */ - int ds_commit_idx; /* ds index if ds_clp is set */ - int pgio_mirror_idx;/* mirror index in pgio layer */ + u32 ds_commit_idx; /* ds index if ds_clp is set */ + u32 pgio_mirror_idx;/* mirror index in pgio layer */ }; struct nfs_mds_commit_info { From ed38c33f1cc5a4e6da63f71879106acc0027e286 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 20 Sep 2020 13:26:20 +0200 Subject: [PATCH 091/243] xprtrdma: drop double zeroing sg_init_table zeroes its first argument, so the allocation of that argument doesn't have to. the semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x,n,flags; @@ x = - kcalloc + kmalloc_array (n,sizeof(*x),flags) ... sg_init_table(x,n) // Signed-off-by: Julia Lawall Acked-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 7f94c9a19fd3..44888f5badef 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -124,7 +124,7 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) if (IS_ERR(frmr)) goto out_mr_err; - sg = kcalloc(depth, sizeof(*sg), GFP_NOFS); + sg = kmalloc_array(depth, sizeof(*sg), GFP_NOFS); if (!sg) goto out_list_err; From 965fe1ceea05de735f63d1c1f90f0310fc830a8d Mon Sep 17 00:00:00 2001 From: Bastian Krause Date: Thu, 17 Sep 2020 20:32:39 +0200 Subject: [PATCH 092/243] dt-bindings: rtc: let aux-voltage-chargeable supersede trickle-diode-disable Some RTCs can be equipped with a chargeable battery or supercap. Every RTC allowing this whose driver's implement it are charged by default. To disable this the trickle-diode-disable flag exists. If a driver did not support charging and some time later one wants to add that feature, there is currently no way to do it without breaking dt backwards compatibility. RTCs on boards without the trickle-diode-disable flag in their device tree would suddenly charge their battery/supercap which is a change in behavior. Change that by introducing aux-voltage-chargeable, not as a flag but as a uint32 enum allowing to set "do not charge" (0) or "charge" (1). This dt property is optional, so we can now distinguish these cases. Care must be taken to support the old behavior for device trees without aux-voltage-chargeable nonetheless to stay compatible. Signed-off-by: Bastian Krause Signed-off-by: Alexandre Belloni Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200917183246.19446-2-bst@pengutronix.de --- Documentation/devicetree/bindings/rtc/rtc.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/devicetree/bindings/rtc/rtc.yaml b/Documentation/devicetree/bindings/rtc/rtc.yaml index ee237b2ed66a..6b8a11325691 100644 --- a/Documentation/devicetree/bindings/rtc/rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/rtc.yaml @@ -17,6 +17,15 @@ properties: $nodename: pattern: "^rtc(@.*|-[0-9a-f])*$" + aux-voltage-chargeable: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + description: | + Tells whether the battery/supercap of the RTC (if any) is + chargeable or not: + 0: not chargeable + 1: chargeable + quartz-load-femtofarads: $ref: /schemas/types.yaml#/definitions/uint32 description: @@ -35,6 +44,7 @@ properties: description: Do not use internal trickle charger diode. Should be given if internal trickle charger diode should be disabled. + deprecated: true trickle-resistor-ohms: $ref: /schemas/types.yaml#/definitions/uint32 From c53cee48670cc623a42e49319a261798d8608b0e Mon Sep 17 00:00:00 2001 From: Bastian Krause Date: Thu, 17 Sep 2020 20:32:40 +0200 Subject: [PATCH 093/243] dt-bindings: rtc: ds1307: let aux-voltage-chargeable supersede trickle-diode-disable trickle-diode-disable is deprecated, so reflect that in the driver's binding and add the new aux-voltage-chargeable. Signed-off-by: Bastian Krause Signed-off-by: Alexandre Belloni Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200917183246.19446-3-bst@pengutronix.de --- Documentation/devicetree/bindings/rtc/rtc-ds1307.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rtc/rtc-ds1307.txt b/Documentation/devicetree/bindings/rtc/rtc-ds1307.txt index 66f0a31ae9ce..08ea9734da80 100644 --- a/Documentation/devicetree/bindings/rtc/rtc-ds1307.txt +++ b/Documentation/devicetree/bindings/rtc/rtc-ds1307.txt @@ -31,9 +31,16 @@ Optional properties: Selected resistor for trickle charger Possible values are 250, 2000, 4000 Should be given if trickle charger should be enabled -- trickle-diode-disable : ds1339, ds1340 and ds 1388 only +- aux-voltage-chargeable: ds1339, ds1340 and ds1388 only + Tells whether the battery/supercap of the RTC (if any) is + chargeable or not. + Possible values are 0 (not chargeable), 1 (chargeable) + +Deprecated properties: +- trickle-diode-disable : ds1339, ds1340 and ds1388 only Do not use internal trickle charger diode Should be given if internal trickle charger diode should be disabled + (superseded by aux-voltage-chargeable) Example: ds1339: rtc@68 { From 40d58c9742286ef95ef8dc5885d4043224490c82 Mon Sep 17 00:00:00 2001 From: Bastian Krause Date: Thu, 17 Sep 2020 20:32:41 +0200 Subject: [PATCH 094/243] dt-bindings: rtc: ds1307: add rx8130 aux-voltage-chargeable support Epson's RX8130 was not charged before. A related patch will allow optional charging. Signed-off-by: Bastian Krause Signed-off-by: Alexandre Belloni Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200917183246.19446-4-bst@pengutronix.de --- Documentation/devicetree/bindings/rtc/rtc-ds1307.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rtc/rtc-ds1307.txt b/Documentation/devicetree/bindings/rtc/rtc-ds1307.txt index 08ea9734da80..36f610bb051e 100644 --- a/Documentation/devicetree/bindings/rtc/rtc-ds1307.txt +++ b/Documentation/devicetree/bindings/rtc/rtc-ds1307.txt @@ -31,7 +31,7 @@ Optional properties: Selected resistor for trickle charger Possible values are 250, 2000, 4000 Should be given if trickle charger should be enabled -- aux-voltage-chargeable: ds1339, ds1340 and ds1388 only +- aux-voltage-chargeable: ds1339, ds1340, ds1388 and rx8130 only Tells whether the battery/supercap of the RTC (if any) is chargeable or not. Possible values are 0 (not chargeable), 1 (chargeable) From 462eb736db3db76899022e4e4db788a7b6efbe09 Mon Sep 17 00:00:00 2001 From: Bastian Krause Date: Thu, 17 Sep 2020 20:32:42 +0200 Subject: [PATCH 095/243] rtc: ds1307: apply DS13XX_TRICKLE_CHARGER_MAGIC only conditionally DS13XX_TRICKLE_CHARGER_MAGIC sets the trickle-charge select (TCS) bits (7..4). The datasheet of Maxim Integrated's DS1339 [1] for instance reads: "To prevent accidental enabling, only a pattern on 1010 enables the trickle charger. All other patterns disable the trickle charger." Since not all RTCs connected to a backup battery or supercap use these bits DS13XX_TRICKLE_CHARGER_MAGIC should not get applied for all charger setups unconditionally. Epson's RX8130 is such an example: Instead of TCS bits "SMPTSEL1", "SMPTSEL0", "CHGEN" and "INIEN" are expected as bit 7..4. DS1339 and DS1340 are currently the only RTCs in the ds1307 driver that apply DS13XX_TRICKLE_CHARGER_MAGIC to their setup register value. So apply DS13XX_TRICKLE_CHARGER_MAGIC in do_trickle_setup_ds1339() which is used by both RTCs. [1] https://datasheets.maximintegrated.com/en/ds/DS1339-DS1339U.pdf [2] https://support.epson.biz/td/api/doc_check.php?dl=app_RX8130CE Signed-off-by: Bastian Krause Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200917183246.19446-5-bst@pengutronix.de --- drivers/rtc/rtc-ds1307.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 8f4ddbaa2052..82f75a798705 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -512,6 +512,8 @@ static u8 do_trickle_setup_ds1339(struct ds1307 *ds1307, u32 ohms, bool diode) u8 setup = (diode) ? DS1307_TRICKLE_CHARGER_DIODE : DS1307_TRICKLE_CHARGER_NO_DIODE; + setup |= DS13XX_TRICKLE_CHARGER_MAGIC; + switch (ohms) { case 250: setup |= DS1307_TRICKLE_CHARGER_250_OHM; @@ -1763,7 +1765,6 @@ static int ds1307_probe(struct i2c_client *client, trickle_charger_setup = pdata->trickle_charger_setup; if (trickle_charger_setup && chip->trickle_charger_reg) { - trickle_charger_setup |= DS13XX_TRICKLE_CHARGER_MAGIC; dev_dbg(ds1307->dev, "writing trickle charger info 0x%x to 0x%x\n", trickle_charger_setup, chip->trickle_charger_reg); From 1b5b6af788ae59ee73e3f3230dbfa4f0e31d8d18 Mon Sep 17 00:00:00 2001 From: Bastian Krause Date: Thu, 17 Sep 2020 20:32:43 +0200 Subject: [PATCH 096/243] rtc: ds1307: introduce requires_trickle_resistor per chip Make trickle-resistor-ohms optional for charging setups that do not require specifying ROUT bits (specifying the resistor value between Vcc and Vbackup). In order to allow specifying that, introduce requires_trickle_resistor per chip. Signed-off-by: Bastian Krause Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200917183246.19446-6-bst@pengutronix.de --- drivers/rtc/rtc-ds1307.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 82f75a798705..64fa1318817c 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -191,6 +191,10 @@ struct chip_desc { u16 trickle_charger_reg; u8 (*do_trickle_setup)(struct ds1307 *, u32, bool); + /* Does the RTC require trickle-resistor-ohms to select the value of + * the resistor between Vcc and Vbackup? + */ + bool requires_trickle_resistor; }; static const struct chip_desc chips[last_ds_type]; @@ -986,6 +990,7 @@ static const struct chip_desc chips[last_ds_type] = { .bbsqi_bit = DS1339_BIT_BBSQI, .trickle_charger_reg = 0x10, .do_trickle_setup = &do_trickle_setup_ds1339, + .requires_trickle_resistor = true, }, [ds_1340] = { .century_reg = DS1307_REG_HOUR, @@ -993,6 +998,7 @@ static const struct chip_desc chips[last_ds_type] = { .century_bit = DS1340_BIT_CENTURY, .do_trickle_setup = &do_trickle_setup_ds1339, .trickle_charger_reg = 0x08, + .requires_trickle_resistor = true, }, [ds_1341] = { .century_reg = DS1307_REG_MONTH, @@ -1307,7 +1313,7 @@ static u8 ds1307_trickle_init(struct ds1307 *ds1307, return 0; if (device_property_read_u32(ds1307->dev, "trickle-resistor-ohms", - &ohms)) + &ohms) && chip->requires_trickle_resistor) return 0; if (device_property_read_bool(ds1307->dev, "trickle-diode-disable")) From 95a74cbb21a2431dd2fd8918fa26113629b6e13e Mon Sep 17 00:00:00 2001 From: Bastian Krause Date: Thu, 17 Sep 2020 20:32:44 +0200 Subject: [PATCH 097/243] rtc: ds1307: store previous charge default per chip Some RTC's batteries and supercaps were charged by default until now. In contrast other RTCs allow charging but the driver did not configure them to do so until now. These must not be charged by default to stay backwards compatible. In order to do that, store the charge default per chip. Signed-off-by: Bastian Krause Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200917183246.19446-7-bst@pengutronix.de --- drivers/rtc/rtc-ds1307.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 64fa1318817c..fdd6f9fda6f9 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -195,6 +195,11 @@ struct chip_desc { * the resistor between Vcc and Vbackup? */ bool requires_trickle_resistor; + /* Some RTC's batteries and supercaps were charged by default, others + * allow charging but were not configured previously to do so. + * Remember this behavior to stay backwards compatible. + */ + bool charge_default; }; static const struct chip_desc chips[last_ds_type]; @@ -991,6 +996,7 @@ static const struct chip_desc chips[last_ds_type] = { .trickle_charger_reg = 0x10, .do_trickle_setup = &do_trickle_setup_ds1339, .requires_trickle_resistor = true, + .charge_default = true, }, [ds_1340] = { .century_reg = DS1307_REG_HOUR, @@ -999,6 +1005,7 @@ static const struct chip_desc chips[last_ds_type] = { .do_trickle_setup = &do_trickle_setup_ds1339, .trickle_charger_reg = 0x08, .requires_trickle_resistor = true, + .charge_default = true, }, [ds_1341] = { .century_reg = DS1307_REG_MONTH, @@ -1307,7 +1314,7 @@ static u8 ds1307_trickle_init(struct ds1307 *ds1307, const struct chip_desc *chip) { u32 ohms; - bool diode = true; + bool diode = chip->charge_default; if (!chip->do_trickle_setup) return 0; From 0874734e09af5cc05439dbe2c8ff704f14d679f5 Mon Sep 17 00:00:00 2001 From: Bastian Krause Date: Thu, 17 Sep 2020 20:32:45 +0200 Subject: [PATCH 098/243] rtc: ds1307: consider aux-voltage-chargeable Prefer aux-voltage-chargeable over trickle-diode-disable and set diode accordingly. This is then passed to the chip's appropriate charge setup function. Signed-off-by: Bastian Krause Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200917183246.19446-8-bst@pengutronix.de --- drivers/rtc/rtc-ds1307.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index fdd6f9fda6f9..03e166d2b0f8 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -1313,7 +1313,7 @@ static int ds1307_nvram_write(void *priv, unsigned int offset, void *val, static u8 ds1307_trickle_init(struct ds1307 *ds1307, const struct chip_desc *chip) { - u32 ohms; + u32 ohms, chargeable; bool diode = chip->charge_default; if (!chip->do_trickle_setup) @@ -1323,8 +1323,27 @@ static u8 ds1307_trickle_init(struct ds1307 *ds1307, &ohms) && chip->requires_trickle_resistor) return 0; - if (device_property_read_bool(ds1307->dev, "trickle-diode-disable")) + /* aux-voltage-chargeable takes precedence over the deprecated + * trickle-diode-disable + */ + if (!device_property_read_u32(ds1307->dev, "aux-voltage-chargeable", + &chargeable)) { + switch (chargeable) { + case 0: + diode = false; + break; + case 1: + diode = true; + break; + default: + dev_warn(ds1307->dev, + "unsupported aux-voltage-chargeable value\n"); + break; + } + } else if (device_property_read_bool(ds1307->dev, + "trickle-diode-disable")) { diode = false; + } return chip->do_trickle_setup(ds1307, ohms, diode); } From 0026f1604c9ba1ae8108d4977da0366c283552bc Mon Sep 17 00:00:00 2001 From: Bastian Krause Date: Thu, 17 Sep 2020 20:32:46 +0200 Subject: [PATCH 099/243] rtc: ds1307: enable rx8130's backup battery, make it chargeable optionally The ds1307 charger infrastructure now allows to add a rx8130 charger setup that.. - does not depend on trickle-resistor-ohms - does not use DS13XX_TRICKLE_CHARGER_MAGIC trickle-charge select (TCS) bits - keeps previous no-charge behavior for device trees without aux-voltage-chargeable Make that happen. Signed-off-by: Bastian Krause Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200917183246.19446-9-bst@pengutronix.de --- drivers/rtc/rtc-ds1307.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 03e166d2b0f8..9f5f54ca039d 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -122,6 +122,9 @@ enum ds_type { #define RX8130_REG_FLAG_AF BIT(3) #define RX8130_REG_CONTROL0 0x1e #define RX8130_REG_CONTROL0_AIE BIT(3) +#define RX8130_REG_CONTROL1 0x1f +#define RX8130_REG_CONTROL1_INIEN BIT(4) +#define RX8130_REG_CONTROL1_CHGEN BIT(5) #define MCP794XX_REG_CONTROL 0x07 # define MCP794XX_BIT_ALM0_EN 0x10 @@ -541,6 +544,16 @@ static u8 do_trickle_setup_ds1339(struct ds1307 *ds1307, u32 ohms, bool diode) return setup; } +static u8 do_trickle_setup_rx8130(struct ds1307 *ds1307, u32 ohms, bool diode) +{ + /* make sure that the backup battery is enabled */ + u8 setup = RX8130_REG_CONTROL1_INIEN; + if (diode) + setup |= RX8130_REG_CONTROL1_CHGEN; + + return setup; +} + static irqreturn_t rx8130_irq(int irq, void *dev_id) { struct ds1307 *ds1307 = dev_id; @@ -1029,6 +1042,8 @@ static const struct chip_desc chips[last_ds_type] = { .offset = 0x10, .irq_handler = rx8130_irq, .rtc_ops = &rx8130_rtc_ops, + .trickle_charger_reg = RX8130_REG_CONTROL1, + .do_trickle_setup = &do_trickle_setup_rx8130, }, [m41t0] = { .rtc_ops = &m41txx_rtc_ops, From f434f9b7afca80e8abfe5d52b20fe34c39dd2c14 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 24 Sep 2020 15:18:16 +0200 Subject: [PATCH 100/243] eeprom: at24: Initialise AT24 NVMEM ID field The AT24 EEPROM driver does not initialise the 'id' field of the nvmem_config structure and because the entire structure is not initialised, it ends up with a random value. This causes the NVMEM driver to append the device 'devid' value to name of the NVMEM device. Ideally for I2C devices such as the AT24 that already have a unique name, we would not bother to append the 'devid'. However, given that this has always been done for AT24 devices, we cannot remove the 'devid' as this will change the name of the userspace sysfs node for the NVMEM device. Nonetheless we should ensure that the 'id' field of the nvmem_config structure is initialised so that there is no chance of a random value causes problems in the future. Therefore, set the NVMEM config.id to NVMEM_DEVID_AUTO for AT24 EEPROMs so that the 'devid' is always appended. Signed-off-by: Jon Hunter Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 00c8ac0677b4..2fde53dcfc97 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -716,6 +716,7 @@ static int at24_probe(struct i2c_client *client) nvmem_config.type = NVMEM_TYPE_EEPROM; nvmem_config.name = dev_name(dev); nvmem_config.dev = dev; + nvmem_config.id = NVMEM_DEVID_AUTO; nvmem_config.read_only = !writable; nvmem_config.root_only = !(flags & AT24_FLAG_IRUGO); nvmem_config.owner = THIS_MODULE; From 412b7a521c3094cc0d664dd20d114c717df89896 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 16 Sep 2020 10:49:49 +0100 Subject: [PATCH 101/243] dt-bindings: eeprom: at24: Add label property for AT24 Add a label property for the AT24 EEPROM to allow a custom name to be used for identifying the EEPROM on a board. This is useful when there is more than one EEPROM present. Signed-off-by: Jon Hunter Reviewed-by: Rob Herring Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/eeprom/at24.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index 4cee72d53318..6edfa705b486 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -114,6 +114,9 @@ properties: - const: renesas,r1ex24128 - const: atmel,24c128 + label: + description: Descriptive name of the EEPROM. + reg: maxItems: 1 From 9f2664512711788e4e1d06f25a925eb7ac681582 Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Thu, 17 Sep 2020 10:19:00 +0800 Subject: [PATCH 102/243] nfs: fix spellint typo in pnfs.c Change the comment typo: "manger" -> "manager". Signed-off-by: Wang Qing Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 71f7741126b6..0e50b9d45c32 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -902,7 +902,7 @@ pnfs_destroy_layouts_byclid(struct nfs_client *clp, } /* - * Called by the state manger to remove all layouts established under an + * Called by the state manager to remove all layouts established under an * expired lease. */ void From 76bd5c016ef49683d626a48748ef1764aaf8ba63 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 14 Sep 2020 17:05:08 -0400 Subject: [PATCH 103/243] NFSv4: make cache consistency bitmask dynamic Client uses static bitmask for GETATTR on CLOSE/WRITE/DELEGRETURN and ignores the fact that it might have some attributes marked invalid in its cache. Compared to v3 where all attributes are retrieved in postop attributes, v4's cache is frequently out of sync and leads to standalone GETATTRs being sent to the server. Instead, in addition to the minimum cache consistency attributes also check cache_validity and adjust the GETATTR request accordingly. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 45 ++++++++++++++++++++++++++++++++++++++--- include/linux/nfs_xdr.h | 6 +++--- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index aca52e52538f..542961ffa529 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -107,6 +107,9 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, const struct cred *, bool); #endif +static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode, + struct nfs_server *server, + struct nfs4_label *label); #ifdef CONFIG_NFS_V4_SECURITY_LABEL static inline struct nfs4_label * @@ -3632,9 +3635,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { /* Close-to-open cache consistency revalidation */ - if (!nfs4_have_delegation(inode, FMODE_READ)) + if (!nfs4_have_delegation(inode, FMODE_READ)) { calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; - else + nfs4_bitmask_adjust(calldata->arg.bitmask, inode, NFS_SERVER(inode), NULL); + } else calldata->arg.bitmask = NULL; } @@ -5360,6 +5364,38 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } +static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode, + struct nfs_server *server, + struct nfs4_label *label) +{ + + unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + + if ((cache_validity & NFS_INO_INVALID_DATA) || + (cache_validity & NFS_INO_REVAL_PAGECACHE) || + (cache_validity & NFS_INO_REVAL_FORCED) || + (cache_validity & NFS_INO_INVALID_OTHER)) + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode); + + if (cache_validity & NFS_INO_INVALID_ATIME) + bitmask[1] |= FATTR4_WORD1_TIME_ACCESS; + if (cache_validity & NFS_INO_INVALID_ACCESS) + bitmask[0] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | + FATTR4_WORD1_OWNER_GROUP; + if (cache_validity & NFS_INO_INVALID_ACL) + bitmask[0] |= FATTR4_WORD0_ACL; + if (cache_validity & NFS_INO_INVALID_LABEL) + bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL; + if (cache_validity & NFS_INO_INVALID_CTIME) + bitmask[0] |= FATTR4_WORD0_CHANGE; + if (cache_validity & NFS_INO_INVALID_MTIME) + bitmask[1] |= FATTR4_WORD1_TIME_MODIFY; + if (cache_validity & NFS_INO_INVALID_SIZE) + bitmask[0] |= FATTR4_WORD0_SIZE; + if (cache_validity & NFS_INO_INVALID_BLOCKS) + bitmask[1] |= FATTR4_WORD1_SPACE_USED; +} + static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, struct rpc_message *msg, struct rpc_clnt **clnt) @@ -5369,8 +5405,10 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, if (!nfs4_write_need_cache_consistency_data(hdr)) { hdr->args.bitmask = NULL; hdr->res.fattr = NULL; - } else + } else { hdr->args.bitmask = server->cache_consistency_bitmask; + nfs4_bitmask_adjust(hdr->args.bitmask, hdr->inode, server, NULL); + } if (!hdr->pgio_done_cb) hdr->pgio_done_cb = nfs4_write_done_cb; @@ -6406,6 +6444,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; data->args.bitmask = server->cache_consistency_bitmask; + nfs4_bitmask_adjust(data->args.bitmask, inode, server, NULL); nfs_copy_fh(&data->fh, NFS_FH(inode)); nfs4_stateid_copy(&data->stateid, stateid); data->res.fattr = &data->fattr; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 69cb46f7b8d2..0599efd57eb9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -525,7 +525,7 @@ struct nfs_closeargs { struct nfs_seqid * seqid; fmode_t fmode; u32 share_access; - const u32 * bitmask; + u32 * bitmask; struct nfs4_layoutreturn_args *lr_args; }; @@ -608,7 +608,7 @@ struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; const nfs4_stateid *stateid; - const u32 * bitmask; + u32 * bitmask; struct nfs4_layoutreturn_args *lr_args; }; @@ -648,7 +648,7 @@ struct nfs_pgio_args { union { unsigned int replen; /* used by read */ struct { - const u32 * bitmask; /* used by write */ + u32 * bitmask; /* used by write */ enum nfs3_stable_how stable; /* used by write */ }; }; From 61f764c307f6b2079b7af0d4fb7951402b824967 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 24 Sep 2020 15:20:39 +0200 Subject: [PATCH 104/243] eeprom: at24: Support custom device names for AT24 EEPROMs By using the label property, a more descriptive name can be populated for AT24 EEPROMs NVMEM device. Update the AT24 driver to check to see if the label property is present and if so, use this as the name for NVMEM device. Please note that when the 'label' property is present for the AT24 EEPROM, we do not want the NVMEM driver to append the 'devid' to the name and so the nvmem_config.id is initialised to NVMEM_DEVID_NONE. Signed-off-by: Jon Hunter Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 2fde53dcfc97..4aa96d8e78ef 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -713,8 +713,28 @@ static int at24_probe(struct i2c_client *client) return err; } + /* + * If the 'label' property is not present for the AT24 EEPROM, + * then nvmem_config.id is initialised to NVMEM_DEVID_AUTO, + * and this will append the 'devid' to the name of the NVMEM + * device. This is purely legacy and the AT24 driver has always + * defaulted to this. However, if the 'label' property is + * present then this means that the name is specified by the + * firmware and this name should be used verbatim and so it is + * not necessary to append the 'devid'. + */ + if (device_property_present(dev, "label")) { + nvmem_config.id = NVMEM_DEVID_NONE; + err = device_property_read_string(dev, "label", + &nvmem_config.name); + if (err) + return err; + } else { + nvmem_config.id = NVMEM_DEVID_AUTO; + nvmem_config.name = dev_name(dev); + } + nvmem_config.type = NVMEM_TYPE_EEPROM; - nvmem_config.name = dev_name(dev); nvmem_config.dev = dev; nvmem_config.id = NVMEM_DEVID_AUTO; nvmem_config.read_only = !writable; From d9becc53b3ade81e234205e1983f2a0240974f89 Mon Sep 17 00:00:00 2001 From: Khalil Blaiech Date: Tue, 22 Sep 2020 18:49:38 -0400 Subject: [PATCH 105/243] dt-bindings: i2c: I2C binding for Mellanox BlueField SoC Add device tree bindings documentation for Mellanox BlueField I2C SMBus controller. Signed-off-by: Khalil Blaiech Signed-off-by: Wolfram Sang --- .../bindings/i2c/mellanox,i2c-mlxbf.txt | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 Documentation/devicetree/bindings/i2c/mellanox,i2c-mlxbf.txt diff --git a/Documentation/devicetree/bindings/i2c/mellanox,i2c-mlxbf.txt b/Documentation/devicetree/bindings/i2c/mellanox,i2c-mlxbf.txt new file mode 100644 index 000000000000..566ea861aa00 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/mellanox,i2c-mlxbf.txt @@ -0,0 +1,42 @@ +Device tree configuration for the Mellanox I2C SMBus on BlueField SoCs + +Required Properties: + +- compatible : should be "mellanox,i2c-mlxbf1" or "mellanox,i2c-mlxbf2". + +- reg : address offset and length of the device registers. The + registers consist of the following set of resources: + 1) Smbus block registers. + 2) Cause master registers. + 3) Cause slave registers. + 4) Cause coalesce registers (if compatible isn't set + to "mellanox,i2c-mlxbf1"). + +- interrupts : interrupt number. + +Optional Properties: + +- clock-frequency : bus frequency used to configure timing registers; + allowed values are 100000, 400000 and 1000000; + those are expressed in Hz. Default is 100000. + +Example: + +i2c@2804000 { + compatible = "mellanox,i2c-mlxbf1"; + reg = <0x02804000 0x800>, + <0x02801200 0x020>, + <0x02801260 0x020>; + interrupts = <57>; + clock-frequency = <100000>; +}; + +i2c@2808800 { + compatible = "mellanox,i2c-mlxbf2"; + reg = <0x02808800 0x600>, + <0x02808e00 0x020>, + <0x02808e20 0x020>, + <0x02808e40 0x010>; + interrupts = <57>; + clock-frequency = <400000>; +}; From b5b5b32081cd206baa6e58cca7f112d9723785d6 Mon Sep 17 00:00:00 2001 From: Khalil Blaiech Date: Tue, 22 Sep 2020 18:49:37 -0400 Subject: [PATCH 106/243] i2c: mlxbf: I2C SMBus driver for Mellanox BlueField SoC Add BlueField I2C driver to offer master and slave support for Mellanox BlueField SoCs. The driver implements an SMBus adapter and interfaces to multiple busses that can be probed using both ACPI and Device Tree infrastructures. The driver supports several SMBus operations to transfer data back and forth from/to various I2C devices. It is mainly intended to be consumed by userspace tools and utilities, such as i2c-tools and decode-dimms to collect memory module information. On the other hand, the driver has a slave function to support, among others, an IPMB interface that requires both master and slave functions to handle transfers between the BlueField SoC and a board management controllers (e.g., BMC). Signed-off-by: Khalil Blaiech Reviewed-by: Vadim Pasternak Signed-off-by: Wolfram Sang --- MAINTAINERS | 6 + drivers/i2c/busses/Kconfig | 13 + drivers/i2c/busses/Makefile | 1 + drivers/i2c/busses/i2c-mlxbf.c | 2506 ++++++++++++++++++++++++++++++++ 4 files changed, 2526 insertions(+) create mode 100644 drivers/i2c/busses/i2c-mlxbf.c diff --git a/MAINTAINERS b/MAINTAINERS index f0068bceeb61..8580de35179f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11071,6 +11071,12 @@ W: http://www.melfas.com F: Documentation/devicetree/bindings/input/touchscreen/melfas_mip4.txt F: drivers/input/touchscreen/melfas_mip4.c +MELLANOX BLUEFIELD I2C DRIVER +M: Khalil Blaiech +L: linux-i2c@vger.kernel.org +S: Supported +F: drivers/i2c/busses/i2c-mlxbf.c + MELLANOX ETHERNET DRIVER (mlx4_en) M: Tariq Toukan L: netdev@vger.kernel.org diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 2077ed8de681..96685b273f63 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -730,6 +730,19 @@ config I2C_LPC2K This driver can also be built as a module. If so, the module will be called i2c-lpc2k. +config I2C_MLXBF + tristate "Mellanox BlueField I2C controller" + depends on ARM64 + help + Enabling this option will add I2C SMBus support for Mellanox BlueField + system. + + This driver can also be built as a module. If so, the module will be + called i2c-mlxbf. + + This driver implements an I2C SMBus host controller and enables both + master and slave functions. + config I2C_MESON tristate "Amlogic Meson I2C controller" depends on ARCH_MESON || COMPILE_TEST diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 19aff0e45cb5..683c49faca05 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -140,6 +140,7 @@ obj-$(CONFIG_I2C_BRCMSTB) += i2c-brcmstb.o obj-$(CONFIG_I2C_CROS_EC_TUNNEL) += i2c-cros-ec-tunnel.o obj-$(CONFIG_I2C_ELEKTOR) += i2c-elektor.o obj-$(CONFIG_I2C_ICY) += i2c-icy.o +obj-$(CONFIG_I2C_MLXBF) += i2c-mlxbf.o obj-$(CONFIG_I2C_MLXCPLD) += i2c-mlxcpld.o obj-$(CONFIG_I2C_OPAL) += i2c-opal.o obj-$(CONFIG_I2C_PCA_ISA) += i2c-pca-isa.o diff --git a/drivers/i2c/busses/i2c-mlxbf.c b/drivers/i2c/busses/i2c-mlxbf.c new file mode 100644 index 000000000000..ee59e0da082d --- /dev/null +++ b/drivers/i2c/busses/i2c-mlxbf.c @@ -0,0 +1,2506 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Mellanox BlueField I2C bus driver + * + * Copyright (C) 2020 Mellanox Technologies, Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Defines what functionality is present. */ +#define MLXBF_I2C_FUNC_SMBUS_BLOCK \ + (I2C_FUNC_SMBUS_BLOCK_DATA | I2C_FUNC_SMBUS_BLOCK_PROC_CALL) + +#define MLXBF_I2C_FUNC_SMBUS_DEFAULT \ + (I2C_FUNC_SMBUS_BYTE | I2C_FUNC_SMBUS_BYTE_DATA | \ + I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_I2C_BLOCK | \ + I2C_FUNC_SMBUS_PROC_CALL) + +#define MLXBF_I2C_FUNC_ALL \ + (MLXBF_I2C_FUNC_SMBUS_DEFAULT | MLXBF_I2C_FUNC_SMBUS_BLOCK | \ + I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SLAVE) + +#define MLXBF_I2C_SMBUS_MAX 3 + +/* Shared resources info in BlueField platforms. */ + +#define MLXBF_I2C_COALESCE_TYU_ADDR 0x02801300 +#define MLXBF_I2C_COALESCE_TYU_SIZE 0x010 + +#define MLXBF_I2C_GPIO_TYU_ADDR 0x02802000 +#define MLXBF_I2C_GPIO_TYU_SIZE 0x100 + +#define MLXBF_I2C_COREPLL_TYU_ADDR 0x02800358 +#define MLXBF_I2C_COREPLL_TYU_SIZE 0x008 + +#define MLXBF_I2C_COREPLL_YU_ADDR 0x02800c30 +#define MLXBF_I2C_COREPLL_YU_SIZE 0x00c + +#define MLXBF_I2C_SHARED_RES_MAX 3 + +/* + * Note that the following SMBus, CAUSE, GPIO and PLL register addresses + * refer to their respective offsets relative to the corresponding + * memory-mapped region whose addresses are specified in either the DT or + * the ACPI tables or above. + */ + +/* + * SMBus Master core clock frequency. Timing configurations are + * strongly dependent on the core clock frequency of the SMBus + * Master. Default value is set to 400MHz. + */ +#define MLXBF_I2C_TYU_PLL_OUT_FREQ (400 * 1000 * 1000) +/* Reference clock for Bluefield 1 - 156 MHz. */ +#define MLXBF_I2C_TYU_PLL_IN_FREQ (156 * 1000 * 1000) +/* Reference clock for BlueField 2 - 200 MHz. */ +#define MLXBF_I2C_YU_PLL_IN_FREQ (200 * 1000 * 1000) + +/* Constant used to determine the PLL frequency. */ +#define MLNXBF_I2C_COREPLL_CONST 16384 + +/* PLL registers. */ +#define MLXBF_I2C_CORE_PLL_REG0 0x0 +#define MLXBF_I2C_CORE_PLL_REG1 0x4 +#define MLXBF_I2C_CORE_PLL_REG2 0x8 + +/* OR cause register. */ +#define MLXBF_I2C_CAUSE_OR_EVTEN0 0x14 +#define MLXBF_I2C_CAUSE_OR_CLEAR 0x18 + +/* Arbiter Cause Register. */ +#define MLXBF_I2C_CAUSE_ARBITER 0x1c + +/* + * Cause Status flags. Note that those bits might be considered + * as interrupt enabled bits. + */ + +/* Transaction ended with STOP. */ +#define MLXBF_I2C_CAUSE_TRANSACTION_ENDED BIT(0) +/* Master arbitration lost. */ +#define MLXBF_I2C_CAUSE_M_ARBITRATION_LOST BIT(1) +/* Unexpected start detected. */ +#define MLXBF_I2C_CAUSE_UNEXPECTED_START BIT(2) +/* Unexpected stop detected. */ +#define MLXBF_I2C_CAUSE_UNEXPECTED_STOP BIT(3) +/* Wait for transfer continuation. */ +#define MLXBF_I2C_CAUSE_WAIT_FOR_FW_DATA BIT(4) +/* Failed to generate STOP. */ +#define MLXBF_I2C_CAUSE_PUT_STOP_FAILED BIT(5) +/* Failed to generate START. */ +#define MLXBF_I2C_CAUSE_PUT_START_FAILED BIT(6) +/* Clock toggle completed. */ +#define MLXBF_I2C_CAUSE_CLK_TOGGLE_DONE BIT(7) +/* Transfer timeout occurred. */ +#define MLXBF_I2C_CAUSE_M_FW_TIMEOUT BIT(8) +/* Master busy bit reset. */ +#define MLXBF_I2C_CAUSE_M_GW_BUSY_FALL BIT(9) + +#define MLXBF_I2C_CAUSE_MASTER_ARBITER_BITS_MASK GENMASK(9, 0) + +#define MLXBF_I2C_CAUSE_MASTER_STATUS_ERROR \ + (MLXBF_I2C_CAUSE_M_ARBITRATION_LOST | \ + MLXBF_I2C_CAUSE_UNEXPECTED_START | \ + MLXBF_I2C_CAUSE_UNEXPECTED_STOP | \ + MLXBF_I2C_CAUSE_PUT_STOP_FAILED | \ + MLXBF_I2C_CAUSE_PUT_START_FAILED | \ + MLXBF_I2C_CAUSE_CLK_TOGGLE_DONE | \ + MLXBF_I2C_CAUSE_M_FW_TIMEOUT) + +/* + * Slave cause status flags. Note that those bits might be considered + * as interrupt enabled bits. + */ + +/* Write transaction received successfully. */ +#define MLXBF_I2C_CAUSE_WRITE_SUCCESS BIT(0) +/* Read transaction received, waiting for response. */ +#define MLXBF_I2C_CAUSE_READ_WAIT_FW_RESPONSE BIT(13) +/* Slave busy bit reset. */ +#define MLXBF_I2C_CAUSE_S_GW_BUSY_FALL BIT(18) + +#define MLXBF_I2C_CAUSE_SLAVE_ARBITER_BITS_MASK GENMASK(20, 0) + +/* Cause coalesce registers. */ +#define MLXBF_I2C_CAUSE_COALESCE_0 0x00 +#define MLXBF_I2C_CAUSE_COALESCE_1 0x04 +#define MLXBF_I2C_CAUSE_COALESCE_2 0x08 + +#define MLXBF_I2C_CAUSE_TYU_SLAVE_BIT MLXBF_I2C_SMBUS_MAX +#define MLXBF_I2C_CAUSE_YU_SLAVE_BIT 1 + +/* Functional enable register. */ +#define MLXBF_I2C_GPIO_0_FUNC_EN_0 0x28 +/* Force OE enable register. */ +#define MLXBF_I2C_GPIO_0_FORCE_OE_EN 0x30 +/* + * Note that Smbus GWs are on GPIOs 30:25. Two pins are used to control + * SDA/SCL lines: + * + * SMBUS GW0 -> bits[26:25] + * SMBUS GW1 -> bits[28:27] + * SMBUS GW2 -> bits[30:29] + */ +#define MLXBF_I2C_GPIO_SMBUS_GW_PINS(num) (25 + ((num) << 1)) + +/* Note that gw_id can be 0,1 or 2. */ +#define MLXBF_I2C_GPIO_SMBUS_GW_MASK(num) \ + (0xffffffff & (~(0x3 << MLXBF_I2C_GPIO_SMBUS_GW_PINS(num)))) + +#define MLXBF_I2C_GPIO_SMBUS_GW_RESET_PINS(num, val) \ + ((val) & MLXBF_I2C_GPIO_SMBUS_GW_MASK(num)) + +#define MLXBF_I2C_GPIO_SMBUS_GW_ASSERT_PINS(num, val) \ + ((val) | (0x3 << MLXBF_I2C_GPIO_SMBUS_GW_PINS(num))) + +/* SMBus timing parameters. */ +#define MLXBF_I2C_SMBUS_TIMER_SCL_LOW_SCL_HIGH 0x00 +#define MLXBF_I2C_SMBUS_TIMER_FALL_RISE_SPIKE 0x04 +#define MLXBF_I2C_SMBUS_TIMER_THOLD 0x08 +#define MLXBF_I2C_SMBUS_TIMER_TSETUP_START_STOP 0x0c +#define MLXBF_I2C_SMBUS_TIMER_TSETUP_DATA 0x10 +#define MLXBF_I2C_SMBUS_THIGH_MAX_TBUF 0x14 +#define MLXBF_I2C_SMBUS_SCL_LOW_TIMEOUT 0x18 + +enum { + MLXBF_I2C_TIMING_100KHZ = 100000, + MLXBF_I2C_TIMING_400KHZ = 400000, + MLXBF_I2C_TIMING_1000KHZ = 1000000, +}; + +/* + * Defines SMBus operating frequency and core clock frequency. + * According to ADB files, default values are compliant to 100KHz SMBus + * @ 400MHz core clock. The driver should be able to calculate core + * frequency based on PLL parameters. + */ +#define MLXBF_I2C_COREPLL_FREQ MLXBF_I2C_TYU_PLL_OUT_FREQ + +/* Core PLL TYU configuration. */ +#define MLXBF_I2C_COREPLL_CORE_F_TYU_MASK GENMASK(12, 0) +#define MLXBF_I2C_COREPLL_CORE_OD_TYU_MASK GENMASK(3, 0) +#define MLXBF_I2C_COREPLL_CORE_R_TYU_MASK GENMASK(5, 0) + +#define MLXBF_I2C_COREPLL_CORE_F_TYU_SHIFT 3 +#define MLXBF_I2C_COREPLL_CORE_OD_TYU_SHIFT 16 +#define MLXBF_I2C_COREPLL_CORE_R_TYU_SHIFT 20 + +/* Core PLL YU configuration. */ +#define MLXBF_I2C_COREPLL_CORE_F_YU_MASK GENMASK(25, 0) +#define MLXBF_I2C_COREPLL_CORE_OD_YU_MASK GENMASK(3, 0) +#define MLXBF_I2C_COREPLL_CORE_R_YU_MASK GENMASK(5, 0) + +#define MLXBF_I2C_COREPLL_CORE_F_YU_SHIFT 0 +#define MLXBF_I2C_COREPLL_CORE_OD_YU_SHIFT 1 +#define MLXBF_I2C_COREPLL_CORE_R_YU_SHIFT 26 + +/* Core PLL frequency. */ +static u64 mlxbf_i2c_corepll_frequency; + +/* SMBus Master GW. */ +#define MLXBF_I2C_SMBUS_MASTER_GW 0x200 +/* Number of bytes received and sent. */ +#define MLXBF_I2C_SMBUS_RS_BYTES 0x300 +/* Packet error check (PEC) value. */ +#define MLXBF_I2C_SMBUS_MASTER_PEC 0x304 +/* Status bits (ACK/NACK/FW Timeout). */ +#define MLXBF_I2C_SMBUS_MASTER_STATUS 0x308 +/* SMbus Master Finite State Machine. */ +#define MLXBF_I2C_SMBUS_MASTER_FSM 0x310 + +/* + * When enabled, the master will issue a stop condition in case of + * timeout while waiting for FW response. + */ +#define MLXBF_I2C_SMBUS_EN_FW_TIMEOUT 0x31c + +/* SMBus master GW control bits offset in MLXBF_I2C_SMBUS_MASTER_GW[31:3]. */ +#define MLXBF_I2C_MASTER_LOCK_BIT BIT(31) /* Lock bit. */ +#define MLXBF_I2C_MASTER_BUSY_BIT BIT(30) /* Busy bit. */ +#define MLXBF_I2C_MASTER_START_BIT BIT(29) /* Control start. */ +#define MLXBF_I2C_MASTER_CTL_WRITE_BIT BIT(28) /* Control write phase. */ +#define MLXBF_I2C_MASTER_CTL_READ_BIT BIT(19) /* Control read phase. */ +#define MLXBF_I2C_MASTER_STOP_BIT BIT(3) /* Control stop. */ + +#define MLXBF_I2C_MASTER_ENABLE \ + (MLXBF_I2C_MASTER_LOCK_BIT | MLXBF_I2C_MASTER_BUSY_BIT | \ + MLXBF_I2C_MASTER_START_BIT | MLXBF_I2C_MASTER_STOP_BIT) + +#define MLXBF_I2C_MASTER_ENABLE_WRITE \ + (MLXBF_I2C_MASTER_ENABLE | MLXBF_I2C_MASTER_CTL_WRITE_BIT) + +#define MLXBF_I2C_MASTER_ENABLE_READ \ + (MLXBF_I2C_MASTER_ENABLE | MLXBF_I2C_MASTER_CTL_READ_BIT) + +#define MLXBF_I2C_MASTER_SLV_ADDR_SHIFT 12 /* Slave address shift. */ +#define MLXBF_I2C_MASTER_WRITE_SHIFT 21 /* Control write bytes shift. */ +#define MLXBF_I2C_MASTER_SEND_PEC_SHIFT 20 /* Send PEC byte shift. */ +#define MLXBF_I2C_MASTER_PARSE_EXP_SHIFT 11 /* Parse expected bytes shift. */ +#define MLXBF_I2C_MASTER_READ_SHIFT 4 /* Control read bytes shift. */ + +/* SMBus master GW Data descriptor. */ +#define MLXBF_I2C_MASTER_DATA_DESC_ADDR 0x280 +#define MLXBF_I2C_MASTER_DATA_DESC_SIZE 0x80 /* Size in bytes. */ + +/* Maximum bytes to read/write per SMBus transaction. */ +#define MLXBF_I2C_MASTER_DATA_R_LENGTH MLXBF_I2C_MASTER_DATA_DESC_SIZE +#define MLXBF_I2C_MASTER_DATA_W_LENGTH (MLXBF_I2C_MASTER_DATA_DESC_SIZE - 1) + +/* All bytes were transmitted. */ +#define MLXBF_I2C_SMBUS_STATUS_BYTE_CNT_DONE BIT(0) +/* NACK received. */ +#define MLXBF_I2C_SMBUS_STATUS_NACK_RCV BIT(1) +/* Slave's byte count >128 bytes. */ +#define MLXBF_I2C_SMBUS_STATUS_READ_ERR BIT(2) +/* Timeout occurred. */ +#define MLXBF_I2C_SMBUS_STATUS_FW_TIMEOUT BIT(3) + +#define MLXBF_I2C_SMBUS_MASTER_STATUS_MASK GENMASK(3, 0) + +#define MLXBF_I2C_SMBUS_MASTER_STATUS_ERROR \ + (MLXBF_I2C_SMBUS_STATUS_NACK_RCV | \ + MLXBF_I2C_SMBUS_STATUS_READ_ERR | \ + MLXBF_I2C_SMBUS_STATUS_FW_TIMEOUT) + +#define MLXBF_I2C_SMBUS_MASTER_FSM_STOP_MASK BIT(31) +#define MLXBF_I2C_SMBUS_MASTER_FSM_PS_STATE_MASK BIT(15) + +/* SMBus slave GW. */ +#define MLXBF_I2C_SMBUS_SLAVE_GW 0x400 +/* Number of bytes received and sent from/to master. */ +#define MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES 0x500 +/* Packet error check (PEC) value. */ +#define MLXBF_I2C_SMBUS_SLAVE_PEC 0x504 +/* SMBus slave Finite State Machine (FSM). */ +#define MLXBF_I2C_SMBUS_SLAVE_FSM 0x510 +/* + * Should be set when all raised causes handled, and cleared by HW on + * every new cause. + */ +#define MLXBF_I2C_SMBUS_SLAVE_READY 0x52c + +/* SMBus slave GW control bits offset in MLXBF_I2C_SMBUS_SLAVE_GW[31:19]. */ +#define MLXBF_I2C_SLAVE_BUSY_BIT BIT(30) /* Busy bit. */ +#define MLXBF_I2C_SLAVE_WRITE_BIT BIT(29) /* Control write enable. */ + +#define MLXBF_I2C_SLAVE_ENABLE \ + (MLXBF_I2C_SLAVE_BUSY_BIT | MLXBF_I2C_SLAVE_WRITE_BIT) + +#define MLXBF_I2C_SLAVE_WRITE_BYTES_SHIFT 22 /* Number of bytes to write. */ +#define MLXBF_I2C_SLAVE_SEND_PEC_SHIFT 21 /* Send PEC byte shift. */ + +/* SMBus slave GW Data descriptor. */ +#define MLXBF_I2C_SLAVE_DATA_DESC_ADDR 0x480 +#define MLXBF_I2C_SLAVE_DATA_DESC_SIZE 0x80 /* Size in bytes. */ + +/* SMbus slave configuration registers. */ +#define MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG 0x514 +#define MLXBF_I2C_SMBUS_SLAVE_ADDR_CNT 16 +#define MLXBF_I2C_SMBUS_SLAVE_ADDR_EN_BIT 7 +#define MLXBF_I2C_SMBUS_SLAVE_ADDR_MASK GENMASK(6, 0) + +#define MLXBF_I2C_SLAVE_ADDR_ENABLED(addr) \ + ((addr) & (1 << MLXBF_I2C_SMBUS_SLAVE_ADDR_EN_BIT)) + +/* + * Timeout is given in microsends. Note also that timeout handling is not + * exact. + */ +#define MLXBF_I2C_SMBUS_TIMEOUT (300 * 1000) /* 300ms */ + +/* Encapsulates timing parameters. */ +struct mlxbf_i2c_timings { + u16 scl_high; /* Clock high period. */ + u16 scl_low; /* Clock low period. */ + u8 sda_rise; /* Data rise time. */ + u8 sda_fall; /* Data fall time. */ + u8 scl_rise; /* Clock rise time. */ + u8 scl_fall; /* Clock fall time. */ + u16 hold_start; /* Hold time after (REPEATED) START. */ + u16 hold_data; /* Data hold time. */ + u16 setup_start; /* REPEATED START condition setup time. */ + u16 setup_stop; /* STOP condition setup time. */ + u16 setup_data; /* Data setup time. */ + u16 pad; /* Padding. */ + u16 buf; /* Bus free time between STOP and START. */ + u16 thigh_max; /* Thigh max. */ + u32 timeout; /* Detect clock low timeout. */ +}; + +enum { + MLXBF_I2C_F_READ = BIT(0), + MLXBF_I2C_F_WRITE = BIT(1), + MLXBF_I2C_F_NORESTART = BIT(3), + MLXBF_I2C_F_SMBUS_OPERATION = BIT(4), + MLXBF_I2C_F_SMBUS_BLOCK = BIT(5), + MLXBF_I2C_F_SMBUS_PEC = BIT(6), + MLXBF_I2C_F_SMBUS_PROCESS_CALL = BIT(7), +}; + +struct mlxbf_i2c_smbus_operation { + u32 flags; + u32 length; /* Buffer length in bytes. */ + u8 *buffer; +}; + +#define MLXBF_I2C_SMBUS_OP_CNT_1 1 +#define MLXBF_I2C_SMBUS_OP_CNT_2 2 +#define MLXBF_I2C_SMBUS_OP_CNT_3 3 +#define MLXBF_I2C_SMBUS_MAX_OP_CNT MLXBF_I2C_SMBUS_OP_CNT_3 + +struct mlxbf_i2c_smbus_request { + u8 slave; + u8 operation_cnt; + struct mlxbf_i2c_smbus_operation operation[MLXBF_I2C_SMBUS_MAX_OP_CNT]; +}; + +struct mlxbf_i2c_resource { + void __iomem *io; + struct resource *params; + struct mutex *lock; /* Mutex to protect mlxbf_i2c_resource. */ + u8 type; +}; + +/* List of chip resources that are being accessed by the driver. */ +enum { + MLXBF_I2C_SMBUS_RES, + MLXBF_I2C_MST_CAUSE_RES, + MLXBF_I2C_SLV_CAUSE_RES, + MLXBF_I2C_COALESCE_RES, + MLXBF_I2C_COREPLL_RES, + MLXBF_I2C_GPIO_RES, + MLXBF_I2C_END_RES, +}; + +/* Helper macro to define an I2C resource parameters. */ +#define MLXBF_I2C_RES_PARAMS(addr, size, str) \ + { \ + .start = (addr), \ + .end = (addr) + (size) - 1, \ + .name = (str) \ + } + +static struct resource mlxbf_i2c_coalesce_tyu_params = + MLXBF_I2C_RES_PARAMS(MLXBF_I2C_COALESCE_TYU_ADDR, + MLXBF_I2C_COALESCE_TYU_SIZE, + "COALESCE_MEM"); +static struct resource mlxbf_i2c_corepll_tyu_params = + MLXBF_I2C_RES_PARAMS(MLXBF_I2C_COREPLL_TYU_ADDR, + MLXBF_I2C_COREPLL_TYU_SIZE, + "COREPLL_MEM"); +static struct resource mlxbf_i2c_corepll_yu_params = + MLXBF_I2C_RES_PARAMS(MLXBF_I2C_COREPLL_YU_ADDR, + MLXBF_I2C_COREPLL_YU_SIZE, + "COREPLL_MEM"); +static struct resource mlxbf_i2c_gpio_tyu_params = + MLXBF_I2C_RES_PARAMS(MLXBF_I2C_GPIO_TYU_ADDR, + MLXBF_I2C_GPIO_TYU_SIZE, + "GPIO_MEM"); + +static struct mutex mlxbf_i2c_coalesce_lock; +static struct mutex mlxbf_i2c_corepll_lock; +static struct mutex mlxbf_i2c_gpio_lock; + +/* Mellanox BlueField chip type. */ +enum mlxbf_i2c_chip_type { + MLXBF_I2C_CHIP_TYPE_1, /* Mellanox BlueField-1 chip. */ + MLXBF_I2C_CHIP_TYPE_2, /* Mallanox BlueField-2 chip. */ +}; + +struct mlxbf_i2c_chip_info { + enum mlxbf_i2c_chip_type type; + /* Chip shared resources that are being used by the I2C controller. */ + struct mlxbf_i2c_resource *shared_res[MLXBF_I2C_SHARED_RES_MAX]; + + /* Callback to calculate the core PLL frequency. */ + u64 (*calculate_freq)(struct mlxbf_i2c_resource *corepll_res); +}; + +struct mlxbf_i2c_priv { + const struct mlxbf_i2c_chip_info *chip; + struct i2c_adapter adap; + struct mlxbf_i2c_resource *smbus; + struct mlxbf_i2c_resource *mst_cause; + struct mlxbf_i2c_resource *slv_cause; + struct mlxbf_i2c_resource *coalesce; + u64 frequency; /* Core frequency in Hz. */ + int bus; /* Physical bus identifier. */ + int irq; + struct i2c_client *slave; +}; + +static struct mlxbf_i2c_resource mlxbf_i2c_coalesce_res[] = { + [MLXBF_I2C_CHIP_TYPE_1] = { + .params = &mlxbf_i2c_coalesce_tyu_params, + .lock = &mlxbf_i2c_coalesce_lock, + .type = MLXBF_I2C_COALESCE_RES + }, + {} +}; + +static struct mlxbf_i2c_resource mlxbf_i2c_corepll_res[] = { + [MLXBF_I2C_CHIP_TYPE_1] = { + .params = &mlxbf_i2c_corepll_tyu_params, + .lock = &mlxbf_i2c_corepll_lock, + .type = MLXBF_I2C_COREPLL_RES + }, + [MLXBF_I2C_CHIP_TYPE_2] = { + .params = &mlxbf_i2c_corepll_yu_params, + .lock = &mlxbf_i2c_corepll_lock, + .type = MLXBF_I2C_COREPLL_RES, + } +}; + +static struct mlxbf_i2c_resource mlxbf_i2c_gpio_res[] = { + [MLXBF_I2C_CHIP_TYPE_1] = { + .params = &mlxbf_i2c_gpio_tyu_params, + .lock = &mlxbf_i2c_gpio_lock, + .type = MLXBF_I2C_GPIO_RES + }, + {} +}; + +static u8 mlxbf_i2c_bus_count; + +static struct mutex mlxbf_i2c_bus_lock; + +/* Polling frequency in microseconds. */ +#define MLXBF_I2C_POLL_FREQ_IN_USEC 200 + +#define MLXBF_I2C_SHIFT_0 0 +#define MLXBF_I2C_SHIFT_8 8 +#define MLXBF_I2C_SHIFT_16 16 +#define MLXBF_I2C_SHIFT_24 24 + +#define MLXBF_I2C_MASK_8 GENMASK(7, 0) +#define MLXBF_I2C_MASK_16 GENMASK(15, 0) + +#define MLXBF_I2C_FREQUENCY_1GHZ 1000000000 + +static void mlxbf_i2c_write(void __iomem *io, int reg, u32 val) +{ + writel(val, io + reg); +} + +static u32 mlxbf_i2c_read(void __iomem *io, int reg) +{ + return readl(io + reg); +} + +/* + * This function is used to read data from Master GW Data Descriptor. + * Data bytes in the Master GW Data Descriptor are shifted left so the + * data starts at the MSB of the descriptor registers as set by the + * underlying hardware. TYU_READ_DATA enables byte swapping while + * reading data bytes, and MUST be called by the SMBus read routines + * to copy data from the 32 * 32-bit HW Data registers a.k.a Master GW + * Data Descriptor. + */ +static u32 mlxbf_i2c_read_data(void __iomem *io, int reg) +{ + return (u32)be32_to_cpu(mlxbf_i2c_read(io, reg)); +} + +/* + * This function is used to write data to the Master GW Data Descriptor. + * Data copied to the Master GW Data Descriptor MUST be shifted left so + * the data starts at the MSB of the descriptor registers as required by + * the underlying hardware. TYU_WRITE_DATA enables byte swapping when + * writing data bytes, and MUST be called by the SMBus write routines to + * copy data to the 32 * 32-bit HW Data registers a.k.a Master GW Data + * Descriptor. + */ +static void mlxbf_i2c_write_data(void __iomem *io, int reg, u32 val) +{ + mlxbf_i2c_write(io, reg, (u32)cpu_to_be32(val)); +} + +/* + * Function to poll a set of bits at a specific address; it checks whether + * the bits are equal to zero when eq_zero is set to 'true', and not equal + * to zero when eq_zero is set to 'false'. + * Note that the timeout is given in microseconds. + */ +static u32 mlxbf_smbus_poll(void __iomem *io, u32 addr, u32 mask, + bool eq_zero, u32 timeout) +{ + u32 bits; + + timeout = (timeout / MLXBF_I2C_POLL_FREQ_IN_USEC) + 1; + + do { + bits = mlxbf_i2c_read(io, addr) & mask; + if (eq_zero ? bits == 0 : bits != 0) + return eq_zero ? 1 : bits; + udelay(MLXBF_I2C_POLL_FREQ_IN_USEC); + } while (timeout-- != 0); + + return 0; +} + +/* + * SW must make sure that the SMBus Master GW is idle before starting + * a transaction. Accordingly, this function polls the Master FSM stop + * bit; it returns false when the bit is asserted, true if not. + */ +static bool mlxbf_smbus_master_wait_for_idle(struct mlxbf_i2c_priv *priv) +{ + u32 mask = MLXBF_I2C_SMBUS_MASTER_FSM_STOP_MASK; + u32 addr = MLXBF_I2C_SMBUS_MASTER_FSM; + u32 timeout = MLXBF_I2C_SMBUS_TIMEOUT; + + if (mlxbf_smbus_poll(priv->smbus->io, addr, mask, true, timeout)) + return true; + + return false; +} + +static bool mlxbf_i2c_smbus_transaction_success(u32 master_status, + u32 cause_status) +{ + /* + * When transaction ended with STOP, all bytes were transmitted, + * and no NACK received, then the transaction ended successfully. + * On the other hand, when the GW is configured with the stop bit + * de-asserted then the SMBus expects the following GW configuration + * for transfer continuation. + */ + if ((cause_status & MLXBF_I2C_CAUSE_WAIT_FOR_FW_DATA) || + ((cause_status & MLXBF_I2C_CAUSE_TRANSACTION_ENDED) && + (master_status & MLXBF_I2C_SMBUS_STATUS_BYTE_CNT_DONE) && + !(master_status & MLXBF_I2C_SMBUS_STATUS_NACK_RCV))) + return true; + + return false; +} + +/* + * Poll SMBus master status and return transaction status, + * i.e. whether succeeded or failed. I2C and SMBus fault codes + * are returned as negative numbers from most calls, with zero + * or some positive number indicating a non-fault return. + */ +static int mlxbf_i2c_smbus_check_status(struct mlxbf_i2c_priv *priv) +{ + u32 master_status_bits; + u32 cause_status_bits; + + /* + * GW busy bit is raised by the driver and cleared by the HW + * when the transaction is completed. The busy bit is a good + * indicator of transaction status. So poll the busy bit, and + * then read the cause and master status bits to determine if + * errors occurred during the transaction. + */ + mlxbf_smbus_poll(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_GW, + MLXBF_I2C_MASTER_BUSY_BIT, true, + MLXBF_I2C_SMBUS_TIMEOUT); + + /* Read cause status bits. */ + cause_status_bits = mlxbf_i2c_read(priv->mst_cause->io, + MLXBF_I2C_CAUSE_ARBITER); + cause_status_bits &= MLXBF_I2C_CAUSE_MASTER_ARBITER_BITS_MASK; + + /* + * Parse both Cause and Master GW bits, then return transaction status. + */ + + master_status_bits = mlxbf_i2c_read(priv->smbus->io, + MLXBF_I2C_SMBUS_MASTER_STATUS); + master_status_bits &= MLXBF_I2C_SMBUS_MASTER_STATUS_MASK; + + if (mlxbf_i2c_smbus_transaction_success(master_status_bits, + cause_status_bits)) + return 0; + + /* + * In case of timeout on GW busy, the ISR will clear busy bit but + * transaction ended bits cause will not be set so the transaction + * fails. Then, we must check Master GW status bits. + */ + if ((master_status_bits & MLXBF_I2C_SMBUS_MASTER_STATUS_ERROR) && + (cause_status_bits & (MLXBF_I2C_CAUSE_TRANSACTION_ENDED | + MLXBF_I2C_CAUSE_M_GW_BUSY_FALL))) + return -EIO; + + if (cause_status_bits & MLXBF_I2C_CAUSE_MASTER_STATUS_ERROR) + return -EAGAIN; + + return -ETIMEDOUT; +} + +static void mlxbf_i2c_smbus_write_data(struct mlxbf_i2c_priv *priv, + const u8 *data, u8 length, u32 addr) +{ + u8 offset, aligned_length; + u32 data32; + + aligned_length = round_up(length, 4); + + /* Copy data bytes from 4-byte aligned source buffer. */ + for (offset = 0; offset < aligned_length; offset += sizeof(u32)) { + data32 = *((u32 *)(data + offset)); + mlxbf_i2c_write_data(priv->smbus->io, addr + offset, data32); + } +} + +static void mlxbf_i2c_smbus_read_data(struct mlxbf_i2c_priv *priv, + u8 *data, u8 length, u32 addr) +{ + u32 data32, mask; + u8 byte, offset; + + mask = sizeof(u32) - 1; + + for (offset = 0; offset < (length & ~mask); offset += sizeof(u32)) { + data32 = mlxbf_i2c_read_data(priv->smbus->io, addr + offset); + *((u32 *)(data + offset)) = data32; + } + + if (!(length & mask)) + return; + + data32 = mlxbf_i2c_read_data(priv->smbus->io, addr + offset); + + for (byte = 0; byte < (length & mask); byte++) { + data[offset + byte] = data32 & GENMASK(7, 0); + data32 = ror32(data32, MLXBF_I2C_SHIFT_8); + } +} + +static int mlxbf_i2c_smbus_enable(struct mlxbf_i2c_priv *priv, u8 slave, + u8 len, u8 block_en, u8 pec_en, bool read) +{ + u32 command; + + /* Set Master GW control word. */ + if (read) { + command = MLXBF_I2C_MASTER_ENABLE_READ; + command |= rol32(len, MLXBF_I2C_MASTER_READ_SHIFT); + } else { + command = MLXBF_I2C_MASTER_ENABLE_WRITE; + command |= rol32(len, MLXBF_I2C_MASTER_WRITE_SHIFT); + } + command |= rol32(slave, MLXBF_I2C_MASTER_SLV_ADDR_SHIFT); + command |= rol32(block_en, MLXBF_I2C_MASTER_PARSE_EXP_SHIFT); + command |= rol32(pec_en, MLXBF_I2C_MASTER_SEND_PEC_SHIFT); + + /* Clear status bits. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_STATUS, 0x0); + /* Set the cause data. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_CAUSE_OR_CLEAR, ~0x0); + /* Zero PEC byte. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_PEC, 0x0); + /* Zero byte count. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_RS_BYTES, 0x0); + + /* GW activation. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_GW, command); + + /* + * Poll master status and check status bits. An ACK is sent when + * completing writing data to the bus (Master 'byte_count_done' bit + * is set to 1). + */ + return mlxbf_i2c_smbus_check_status(priv); +} + +static int +mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv, + struct mlxbf_i2c_smbus_request *request) +{ + u8 data_desc[MLXBF_I2C_MASTER_DATA_DESC_SIZE] = { 0 }; + u8 op_idx, data_idx, data_len, write_len, read_len; + struct mlxbf_i2c_smbus_operation *operation; + u8 read_en, write_en, block_en, pec_en; + u8 slave, flags, addr; + u8 *read_buf; + int ret = 0; + + if (request->operation_cnt > MLXBF_I2C_SMBUS_MAX_OP_CNT) + return -EINVAL; + + read_buf = NULL; + data_idx = 0; + read_en = 0; + write_en = 0; + write_len = 0; + read_len = 0; + block_en = 0; + pec_en = 0; + slave = request->slave & GENMASK(6, 0); + addr = slave << 1; + + /* First of all, check whether the HW is idle. */ + if (WARN_ON(!mlxbf_smbus_master_wait_for_idle(priv))) + return -EBUSY; + + /* Set first byte. */ + data_desc[data_idx++] = addr; + + for (op_idx = 0; op_idx < request->operation_cnt; op_idx++) { + operation = &request->operation[op_idx]; + flags = operation->flags; + + /* + * Note that read and write operations might be handled by a + * single command. If the MLXBF_I2C_F_SMBUS_OPERATION is set + * then write command byte and set the optional SMBus specific + * bits such as block_en and pec_en. These bits MUST be + * submitted by the first operation only. + */ + if (op_idx == 0 && flags & MLXBF_I2C_F_SMBUS_OPERATION) { + block_en = flags & MLXBF_I2C_F_SMBUS_BLOCK; + pec_en = flags & MLXBF_I2C_F_SMBUS_PEC; + } + + if (flags & MLXBF_I2C_F_WRITE) { + write_en = 1; + write_len += operation->length; + memcpy(data_desc + data_idx, + operation->buffer, operation->length); + data_idx += operation->length; + } + /* + * We assume that read operations are performed only once per + * SMBus transaction. *TBD* protect this statement so it won't + * be executed twice? or return an error if we try to read more + * than once? + */ + if (flags & MLXBF_I2C_F_READ) { + read_en = 1; + /* Subtract 1 as required by HW. */ + read_len = operation->length - 1; + read_buf = operation->buffer; + } + } + + /* Set Master GW data descriptor. */ + data_len = write_len + 1; /* Add one byte of the slave address. */ + /* + * Note that data_len cannot be 0. Indeed, the slave address byte + * must be written to the data registers. + */ + mlxbf_i2c_smbus_write_data(priv, (const u8 *)data_desc, data_len, + MLXBF_I2C_MASTER_DATA_DESC_ADDR); + + if (write_en) { + ret = mlxbf_i2c_smbus_enable(priv, slave, write_len, block_en, + pec_en, 0); + if (ret) + return ret; + } + + if (read_en) { + /* Write slave address to Master GW data descriptor. */ + mlxbf_i2c_smbus_write_data(priv, (const u8 *)&addr, 1, + MLXBF_I2C_MASTER_DATA_DESC_ADDR); + ret = mlxbf_i2c_smbus_enable(priv, slave, read_len, block_en, + pec_en, 1); + if (!ret) { + /* Get Master GW data descriptor. */ + mlxbf_i2c_smbus_read_data(priv, data_desc, read_len + 1, + MLXBF_I2C_MASTER_DATA_DESC_ADDR); + + /* Get data from Master GW data descriptor. */ + memcpy(read_buf, data_desc, read_len + 1); + } + + /* + * After a read operation the SMBus FSM ps (present state) + * needs to be 'manually' reset. This should be removed in + * next tag integration. + */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_FSM, + MLXBF_I2C_SMBUS_MASTER_FSM_PS_STATE_MASK); + } + + return ret; +} + +/* I2C SMBus protocols. */ + +static void +mlxbf_i2c_smbus_quick_command(struct mlxbf_i2c_smbus_request *request, + u8 read) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_1; + + request->operation[0].length = 0; + request->operation[0].flags = MLXBF_I2C_F_WRITE; + request->operation[0].flags |= read ? MLXBF_I2C_F_READ : 0; +} + +static void mlxbf_i2c_smbus_byte_func(struct mlxbf_i2c_smbus_request *request, + u8 *data, bool read, bool pec_check) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_1; + + request->operation[0].length = 1; + request->operation[0].length += pec_check; + + request->operation[0].flags = MLXBF_I2C_F_SMBUS_OPERATION; + request->operation[0].flags |= read ? + MLXBF_I2C_F_READ : MLXBF_I2C_F_WRITE; + request->operation[0].flags |= pec_check ? MLXBF_I2C_F_SMBUS_PEC : 0; + + request->operation[0].buffer = data; +} + +static void +mlxbf_i2c_smbus_data_byte_func(struct mlxbf_i2c_smbus_request *request, + u8 *command, u8 *data, bool read, bool pec_check) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_2; + + request->operation[0].length = 1; + request->operation[0].flags = + MLXBF_I2C_F_SMBUS_OPERATION | MLXBF_I2C_F_WRITE; + request->operation[0].flags |= pec_check ? MLXBF_I2C_F_SMBUS_PEC : 0; + request->operation[0].buffer = command; + + request->operation[1].length = 1; + request->operation[1].length += pec_check; + request->operation[1].flags = read ? + MLXBF_I2C_F_READ : MLXBF_I2C_F_WRITE; + request->operation[1].buffer = data; +} + +static void +mlxbf_i2c_smbus_data_word_func(struct mlxbf_i2c_smbus_request *request, + u8 *command, u8 *data, bool read, bool pec_check) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_2; + + request->operation[0].length = 1; + request->operation[0].flags = + MLXBF_I2C_F_SMBUS_OPERATION | MLXBF_I2C_F_WRITE; + request->operation[0].flags |= pec_check ? MLXBF_I2C_F_SMBUS_PEC : 0; + request->operation[0].buffer = command; + + request->operation[1].length = 2; + request->operation[1].length += pec_check; + request->operation[1].flags = read ? + MLXBF_I2C_F_READ : MLXBF_I2C_F_WRITE; + request->operation[1].buffer = data; +} + +static void +mlxbf_i2c_smbus_i2c_block_func(struct mlxbf_i2c_smbus_request *request, + u8 *command, u8 *data, u8 *data_len, bool read, + bool pec_check) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_2; + + request->operation[0].length = 1; + request->operation[0].flags = + MLXBF_I2C_F_SMBUS_OPERATION | MLXBF_I2C_F_WRITE; + request->operation[0].flags |= pec_check ? MLXBF_I2C_F_SMBUS_PEC : 0; + request->operation[0].buffer = command; + + /* + * As specified in the standard, the max number of bytes to read/write + * per block operation is 32 bytes. In Golan code, the controller can + * read up to 128 bytes and write up to 127 bytes. + */ + request->operation[1].length = + (*data_len + pec_check > I2C_SMBUS_BLOCK_MAX) ? + I2C_SMBUS_BLOCK_MAX : *data_len + pec_check; + request->operation[1].flags = read ? + MLXBF_I2C_F_READ : MLXBF_I2C_F_WRITE; + /* + * Skip the first data byte, which corresponds to the number of bytes + * to read/write. + */ + request->operation[1].buffer = data + 1; + + *data_len = request->operation[1].length; + + /* Set the number of byte to read. This will be used by userspace. */ + if (read) + data[0] = *data_len; +} + +static void mlxbf_i2c_smbus_block_func(struct mlxbf_i2c_smbus_request *request, + u8 *command, u8 *data, u8 *data_len, + bool read, bool pec_check) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_2; + + request->operation[0].length = 1; + request->operation[0].flags = + MLXBF_I2C_F_SMBUS_OPERATION | MLXBF_I2C_F_WRITE; + request->operation[0].flags |= MLXBF_I2C_F_SMBUS_BLOCK; + request->operation[0].flags |= pec_check ? MLXBF_I2C_F_SMBUS_PEC : 0; + request->operation[0].buffer = command; + + request->operation[1].length = + (*data_len + pec_check > I2C_SMBUS_BLOCK_MAX) ? + I2C_SMBUS_BLOCK_MAX : *data_len + pec_check; + request->operation[1].flags = read ? + MLXBF_I2C_F_READ : MLXBF_I2C_F_WRITE; + request->operation[1].buffer = data + 1; + + *data_len = request->operation[1].length; + + /* Set the number of bytes to read. This will be used by userspace. */ + if (read) + data[0] = *data_len; +} + +static void +mlxbf_i2c_smbus_process_call_func(struct mlxbf_i2c_smbus_request *request, + u8 *command, u8 *data, bool pec_check) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_3; + + request->operation[0].length = 1; + request->operation[0].flags = + MLXBF_I2C_F_SMBUS_OPERATION | MLXBF_I2C_F_WRITE; + request->operation[0].flags |= MLXBF_I2C_F_SMBUS_BLOCK; + request->operation[0].flags |= pec_check ? MLXBF_I2C_F_SMBUS_PEC : 0; + request->operation[0].buffer = command; + + request->operation[1].length = 2; + request->operation[1].flags = MLXBF_I2C_F_WRITE; + request->operation[1].buffer = data; + + request->operation[2].length = 3; + request->operation[2].flags = MLXBF_I2C_F_READ; + request->operation[2].buffer = data; +} + +static void +mlxbf_i2c_smbus_blk_process_call_func(struct mlxbf_i2c_smbus_request *request, + u8 *command, u8 *data, u8 *data_len, + bool pec_check) +{ + u32 length; + + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_3; + + request->operation[0].length = 1; + request->operation[0].flags = + MLXBF_I2C_F_SMBUS_OPERATION | MLXBF_I2C_F_WRITE; + request->operation[0].flags |= MLXBF_I2C_F_SMBUS_BLOCK; + request->operation[0].flags |= (pec_check) ? MLXBF_I2C_F_SMBUS_PEC : 0; + request->operation[0].buffer = command; + + length = (*data_len + pec_check > I2C_SMBUS_BLOCK_MAX) ? + I2C_SMBUS_BLOCK_MAX : *data_len + pec_check; + + request->operation[1].length = length - pec_check; + request->operation[1].flags = MLXBF_I2C_F_WRITE; + request->operation[1].buffer = data; + + request->operation[2].length = length; + request->operation[2].flags = MLXBF_I2C_F_READ; + request->operation[2].buffer = data; + + *data_len = length; /* including PEC byte. */ +} + +/* Initialization functions. */ + +static bool mlxbf_i2c_has_chip_type(struct mlxbf_i2c_priv *priv, u8 type) +{ + return priv->chip->type == type; +} + +static struct mlxbf_i2c_resource * +mlxbf_i2c_get_shared_resource(struct mlxbf_i2c_priv *priv, u8 type) +{ + const struct mlxbf_i2c_chip_info *chip = priv->chip; + struct mlxbf_i2c_resource *res; + u8 res_idx = 0; + + for (res_idx = 0; res_idx < MLXBF_I2C_SHARED_RES_MAX; res_idx++) { + res = chip->shared_res[res_idx]; + if (res && res->type == type) + return res; + } + + return NULL; +} + +static int mlxbf_i2c_init_resource(struct platform_device *pdev, + struct mlxbf_i2c_resource **res, + u8 type) +{ + struct mlxbf_i2c_resource *tmp_res; + struct device *dev = &pdev->dev; + + if (!res || *res || type >= MLXBF_I2C_END_RES) + return -EINVAL; + + tmp_res = devm_kzalloc(dev, sizeof(struct mlxbf_i2c_resource), + GFP_KERNEL); + if (!tmp_res) + return -ENOMEM; + + tmp_res->params = platform_get_resource(pdev, IORESOURCE_MEM, type); + if (!tmp_res->params) { + devm_kfree(dev, tmp_res); + return -EIO; + } + + tmp_res->io = devm_ioremap_resource(dev, tmp_res->params); + if (IS_ERR(tmp_res->io)) { + devm_kfree(dev, tmp_res); + return PTR_ERR(tmp_res->io); + } + + tmp_res->type = type; + + *res = tmp_res; + + return 0; +} + +static u32 mlxbf_i2c_get_ticks(struct mlxbf_i2c_priv *priv, u64 nanoseconds, + bool minimum) +{ + u64 frequency; + u32 ticks; + + /* + * Compute ticks as follow: + * + * Ticks + * Time = --------- x 10^9 => Ticks = Time x Frequency x 10^-9 + * Frequency + */ + frequency = priv->frequency; + ticks = (nanoseconds * frequency) / MLXBF_I2C_FREQUENCY_1GHZ; + /* + * The number of ticks is rounded down and if minimum is equal to 1 + * then add one tick. + */ + if (minimum) + ticks++; + + return ticks; +} + +static u32 mlxbf_i2c_set_timer(struct mlxbf_i2c_priv *priv, u64 nsec, bool opt, + u32 mask, u8 shift) +{ + u32 val = (mlxbf_i2c_get_ticks(priv, nsec, opt) & mask) << shift; + + return val; +} + +static void mlxbf_i2c_set_timings(struct mlxbf_i2c_priv *priv, + const struct mlxbf_i2c_timings *timings) +{ + u32 timer; + + timer = mlxbf_i2c_set_timer(priv, timings->scl_high, + false, MLXBF_I2C_MASK_16, + MLXBF_I2C_SHIFT_0); + timer |= mlxbf_i2c_set_timer(priv, timings->scl_low, + false, MLXBF_I2C_MASK_16, + MLXBF_I2C_SHIFT_16); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_TIMER_SCL_LOW_SCL_HIGH, + timer); + + timer = mlxbf_i2c_set_timer(priv, timings->sda_rise, false, + MLXBF_I2C_MASK_8, MLXBF_I2C_SHIFT_0); + timer |= mlxbf_i2c_set_timer(priv, timings->sda_fall, false, + MLXBF_I2C_MASK_8, MLXBF_I2C_SHIFT_8); + timer |= mlxbf_i2c_set_timer(priv, timings->scl_rise, false, + MLXBF_I2C_MASK_8, MLXBF_I2C_SHIFT_16); + timer |= mlxbf_i2c_set_timer(priv, timings->scl_fall, false, + MLXBF_I2C_MASK_8, MLXBF_I2C_SHIFT_24); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_TIMER_FALL_RISE_SPIKE, + timer); + + timer = mlxbf_i2c_set_timer(priv, timings->hold_start, true, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_0); + timer |= mlxbf_i2c_set_timer(priv, timings->hold_data, true, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_16); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_TIMER_THOLD, timer); + + timer = mlxbf_i2c_set_timer(priv, timings->setup_start, true, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_0); + timer |= mlxbf_i2c_set_timer(priv, timings->setup_stop, true, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_16); + mlxbf_i2c_write(priv->smbus->io, + MLXBF_I2C_SMBUS_TIMER_TSETUP_START_STOP, timer); + + timer = mlxbf_i2c_set_timer(priv, timings->setup_data, true, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_0); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_TIMER_TSETUP_DATA, + timer); + + timer = mlxbf_i2c_set_timer(priv, timings->buf, false, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_0); + timer |= mlxbf_i2c_set_timer(priv, timings->thigh_max, false, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_16); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_THIGH_MAX_TBUF, + timer); + + timer = timings->timeout; + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SCL_LOW_TIMEOUT, + timer); +} + +enum mlxbf_i2c_timings_config { + MLXBF_I2C_TIMING_CONFIG_100KHZ, + MLXBF_I2C_TIMING_CONFIG_400KHZ, + MLXBF_I2C_TIMING_CONFIG_1000KHZ, +}; + +/* + * Note that the mlxbf_i2c_timings->timeout value is not related to the + * bus frequency, it is impacted by the time it takes the driver to + * complete data transmission before transaction abort. + */ +static const struct mlxbf_i2c_timings mlxbf_i2c_timings[] = { + [MLXBF_I2C_TIMING_CONFIG_100KHZ] = { + .scl_high = 4810, + .scl_low = 5000, + .hold_start = 4000, + .setup_start = 4800, + .setup_stop = 4000, + .setup_data = 250, + .sda_rise = 50, + .sda_fall = 50, + .scl_rise = 50, + .scl_fall = 50, + .hold_data = 300, + .buf = 20000, + .thigh_max = 5000, + .timeout = 106500 + }, + [MLXBF_I2C_TIMING_CONFIG_400KHZ] = { + .scl_high = 1011, + .scl_low = 1300, + .hold_start = 600, + .setup_start = 700, + .setup_stop = 600, + .setup_data = 100, + .sda_rise = 50, + .sda_fall = 50, + .scl_rise = 50, + .scl_fall = 50, + .hold_data = 300, + .buf = 20000, + .thigh_max = 5000, + .timeout = 106500 + }, + [MLXBF_I2C_TIMING_CONFIG_1000KHZ] = { + .scl_high = 600, + .scl_low = 1300, + .hold_start = 600, + .setup_start = 600, + .setup_stop = 600, + .setup_data = 100, + .sda_rise = 50, + .sda_fall = 50, + .scl_rise = 50, + .scl_fall = 50, + .hold_data = 300, + .buf = 20000, + .thigh_max = 5000, + .timeout = 106500 + } +}; + +static int mlxbf_i2c_init_timings(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + enum mlxbf_i2c_timings_config config_idx; + struct device *dev = &pdev->dev; + u32 config_khz; + + int ret; + + ret = device_property_read_u32(dev, "clock-frequency", &config_khz); + if (ret < 0) + config_khz = MLXBF_I2C_TIMING_100KHZ; + + switch (config_khz) { + default: + /* Default settings is 100 KHz. */ + pr_warn("Illegal value %d: defaulting to 100 KHz\n", + config_khz); + fallthrough; + case MLXBF_I2C_TIMING_100KHZ: + config_idx = MLXBF_I2C_TIMING_CONFIG_100KHZ; + break; + + case MLXBF_I2C_TIMING_400KHZ: + config_idx = MLXBF_I2C_TIMING_CONFIG_400KHZ; + break; + + case MLXBF_I2C_TIMING_1000KHZ: + config_idx = MLXBF_I2C_TIMING_CONFIG_1000KHZ; + break; + } + + mlxbf_i2c_set_timings(priv, &mlxbf_i2c_timings[config_idx]); + + return 0; +} + +static int mlxbf_i2c_get_gpio(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *gpio_res; + struct device *dev = &pdev->dev; + struct resource *params; + resource_size_t size; + + gpio_res = mlxbf_i2c_get_shared_resource(priv, MLXBF_I2C_GPIO_RES); + if (!gpio_res) + return -EPERM; + + /* + * The GPIO region in TYU space is shared among I2C busses. + * This function MUST be serialized to avoid racing when + * claiming the memory region and/or setting up the GPIO. + */ + lockdep_assert_held(gpio_res->lock); + + /* Check whether the memory map exist. */ + if (gpio_res->io) + return 0; + + params = gpio_res->params; + size = resource_size(params); + + if (!devm_request_mem_region(dev, params->start, size, params->name)) + return -EFAULT; + + gpio_res->io = devm_ioremap(dev, params->start, size); + if (IS_ERR(gpio_res->io)) { + devm_release_mem_region(dev, params->start, size); + return PTR_ERR(gpio_res->io); + } + + return 0; +} + +static int mlxbf_i2c_release_gpio(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *gpio_res; + struct device *dev = &pdev->dev; + struct resource *params; + + gpio_res = mlxbf_i2c_get_shared_resource(priv, MLXBF_I2C_GPIO_RES); + if (!gpio_res) + return 0; + + mutex_lock(gpio_res->lock); + + if (gpio_res->io) { + /* Release the GPIO resource. */ + params = gpio_res->params; + devm_iounmap(dev, gpio_res->io); + devm_release_mem_region(dev, params->start, + resource_size(params)); + } + + mutex_unlock(gpio_res->lock); + + return 0; +} + +static int mlxbf_i2c_get_corepll(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *corepll_res; + struct device *dev = &pdev->dev; + struct resource *params; + resource_size_t size; + + corepll_res = mlxbf_i2c_get_shared_resource(priv, + MLXBF_I2C_COREPLL_RES); + if (!corepll_res) + return -EPERM; + + /* + * The COREPLL region in TYU space is shared among I2C busses. + * This function MUST be serialized to avoid racing when + * claiming the memory region. + */ + lockdep_assert_held(corepll_res->lock); + + /* Check whether the memory map exist. */ + if (corepll_res->io) + return 0; + + params = corepll_res->params; + size = resource_size(params); + + if (!devm_request_mem_region(dev, params->start, size, params->name)) + return -EFAULT; + + corepll_res->io = devm_ioremap(dev, params->start, size); + if (IS_ERR(corepll_res->io)) { + devm_release_mem_region(dev, params->start, size); + return PTR_ERR(corepll_res->io); + } + + return 0; +} + +static int mlxbf_i2c_release_corepll(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *corepll_res; + struct device *dev = &pdev->dev; + struct resource *params; + + corepll_res = mlxbf_i2c_get_shared_resource(priv, + MLXBF_I2C_COREPLL_RES); + + mutex_lock(corepll_res->lock); + + if (corepll_res->io) { + /* Release the CorePLL resource. */ + params = corepll_res->params; + devm_iounmap(dev, corepll_res->io); + devm_release_mem_region(dev, params->start, + resource_size(params)); + } + + mutex_unlock(corepll_res->lock); + + return 0; +} + +static int mlxbf_i2c_init_master(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *gpio_res; + struct device *dev = &pdev->dev; + u32 config_reg; + int ret; + + /* This configuration is only needed for BlueField 1. */ + if (!mlxbf_i2c_has_chip_type(priv, MLXBF_I2C_CHIP_TYPE_1)) + return 0; + + gpio_res = mlxbf_i2c_get_shared_resource(priv, MLXBF_I2C_GPIO_RES); + if (!gpio_res) + return -EPERM; + + /* + * The GPIO region in TYU space is shared among I2C busses. + * This function MUST be serialized to avoid racing when + * claiming the memory region and/or setting up the GPIO. + */ + + mutex_lock(gpio_res->lock); + + ret = mlxbf_i2c_get_gpio(pdev, priv); + if (ret < 0) { + dev_err(dev, "Failed to get gpio resource"); + mutex_unlock(gpio_res->lock); + return ret; + } + + /* + * TYU - Configuration for GPIO pins. Those pins must be asserted in + * MLXBF_I2C_GPIO_0_FUNC_EN_0, i.e. GPIO 0 is controlled by HW, and must + * be reset in MLXBF_I2C_GPIO_0_FORCE_OE_EN, i.e. GPIO_OE will be driven + * instead of HW_OE. + * For now, we do not reset the GPIO state when the driver is removed. + * First, it is not necessary to disable the bus since we are using + * the same busses. Then, some busses might be shared among Linux and + * platform firmware; disabling the bus might compromise the system + * functionality. + */ + config_reg = mlxbf_i2c_read(gpio_res->io, + MLXBF_I2C_GPIO_0_FUNC_EN_0); + config_reg = MLXBF_I2C_GPIO_SMBUS_GW_ASSERT_PINS(priv->bus, + config_reg); + mlxbf_i2c_write(gpio_res->io, MLXBF_I2C_GPIO_0_FUNC_EN_0, + config_reg); + + config_reg = mlxbf_i2c_read(gpio_res->io, + MLXBF_I2C_GPIO_0_FORCE_OE_EN); + config_reg = MLXBF_I2C_GPIO_SMBUS_GW_RESET_PINS(priv->bus, + config_reg); + mlxbf_i2c_write(gpio_res->io, MLXBF_I2C_GPIO_0_FORCE_OE_EN, + config_reg); + + mutex_unlock(gpio_res->lock); + + return 0; +} + +static u64 mlxbf_calculate_freq_from_tyu(struct mlxbf_i2c_resource *corepll_res) +{ + u64 core_frequency, pad_frequency; + u8 core_od, core_r; + u32 corepll_val; + u16 core_f; + + pad_frequency = MLXBF_I2C_TYU_PLL_IN_FREQ; + + corepll_val = mlxbf_i2c_read(corepll_res->io, + MLXBF_I2C_CORE_PLL_REG1); + + /* Get Core PLL configuration bits. */ + core_f = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_F_TYU_SHIFT) & + MLXBF_I2C_COREPLL_CORE_F_TYU_MASK; + core_od = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_OD_TYU_SHIFT) & + MLXBF_I2C_COREPLL_CORE_OD_TYU_MASK; + core_r = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_R_TYU_SHIFT) & + MLXBF_I2C_COREPLL_CORE_R_TYU_MASK; + + /* + * Compute PLL output frequency as follow: + * + * CORE_F + 1 + * PLL_OUT_FREQ = PLL_IN_FREQ * ---------------------------- + * (CORE_R + 1) * (CORE_OD + 1) + * + * Where PLL_OUT_FREQ and PLL_IN_FREQ refer to CoreFrequency + * and PadFrequency, respectively. + */ + core_frequency = pad_frequency * (++core_f); + core_frequency /= (++core_r) * (++core_od); + + return core_frequency; +} + +static u64 mlxbf_calculate_freq_from_yu(struct mlxbf_i2c_resource *corepll_res) +{ + u32 corepll_reg1_val, corepll_reg2_val; + u64 corepll_frequency, pad_frequency; + u8 core_od, core_r; + u32 core_f; + + pad_frequency = MLXBF_I2C_YU_PLL_IN_FREQ; + + corepll_reg1_val = mlxbf_i2c_read(corepll_res->io, + MLXBF_I2C_CORE_PLL_REG1); + corepll_reg2_val = mlxbf_i2c_read(corepll_res->io, + MLXBF_I2C_CORE_PLL_REG2); + + /* Get Core PLL configuration bits */ + core_f = rol32(corepll_reg1_val, MLXBF_I2C_COREPLL_CORE_F_YU_SHIFT) & + MLXBF_I2C_COREPLL_CORE_F_YU_MASK; + core_r = rol32(corepll_reg1_val, MLXBF_I2C_COREPLL_CORE_R_YU_SHIFT) & + MLXBF_I2C_COREPLL_CORE_R_YU_MASK; + core_od = rol32(corepll_reg2_val, MLXBF_I2C_COREPLL_CORE_OD_YU_SHIFT) & + MLXBF_I2C_COREPLL_CORE_OD_YU_MASK; + + /* + * Compute PLL output frequency as follow: + * + * CORE_F / 16384 + * PLL_OUT_FREQ = PLL_IN_FREQ * ---------------------------- + * (CORE_R + 1) * (CORE_OD + 1) + * + * Where PLL_OUT_FREQ and PLL_IN_FREQ refer to CoreFrequency + * and PadFrequency, respectively. + */ + corepll_frequency = (pad_frequency * core_f) / MLNXBF_I2C_COREPLL_CONST; + corepll_frequency /= (++core_r) * (++core_od); + + return corepll_frequency; +} + +static int mlxbf_i2c_calculate_corepll_freq(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + const struct mlxbf_i2c_chip_info *chip = priv->chip; + struct mlxbf_i2c_resource *corepll_res; + struct device *dev = &pdev->dev; + u64 *freq = &priv->frequency; + int ret; + + corepll_res = mlxbf_i2c_get_shared_resource(priv, + MLXBF_I2C_COREPLL_RES); + if (!corepll_res) + return -EPERM; + + /* + * First, check whether the TYU core Clock frequency is set. + * The TYU core frequency is the same for all I2C busses; when + * the first device gets probed the frequency is determined and + * stored into a globally visible variable. So, first of all, + * check whether the frequency is already set. Here, we assume + * that the frequency is expected to be greater than 0. + */ + mutex_lock(corepll_res->lock); + if (!mlxbf_i2c_corepll_frequency) { + if (!chip->calculate_freq) { + mutex_unlock(corepll_res->lock); + return -EPERM; + } + + ret = mlxbf_i2c_get_corepll(pdev, priv); + if (ret < 0) { + dev_err(dev, "Failed to get corePLL resource"); + mutex_unlock(corepll_res->lock); + return ret; + } + + mlxbf_i2c_corepll_frequency = chip->calculate_freq(corepll_res); + } + mutex_unlock(corepll_res->lock); + + *freq = mlxbf_i2c_corepll_frequency; + + return 0; +} + +static int mlxbf_slave_enable(struct mlxbf_i2c_priv *priv, u8 addr) +{ + u32 slave_reg, slave_reg_tmp, slave_reg_avail, slave_addr_mask; + u8 reg, reg_cnt, byte, addr_tmp, reg_avail, byte_avail; + bool avail, disabled; + + disabled = false; + avail = false; + + if (!priv) + return -EPERM; + + reg_cnt = MLXBF_I2C_SMBUS_SLAVE_ADDR_CNT >> 2; + slave_addr_mask = MLXBF_I2C_SMBUS_SLAVE_ADDR_MASK; + + /* + * Read the slave registers. There are 4 * 32-bit slave registers. + * Each slave register can hold up to 4 * 8-bit slave configuration + * (7-bit address, 1 status bit (1 if enabled, 0 if not)). + */ + for (reg = 0; reg < reg_cnt; reg++) { + slave_reg = mlxbf_i2c_read(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + reg * 0x4); + /* + * Each register holds 4 slave addresses. So, we have to keep + * the byte order consistent with the value read in order to + * update the register correctly, if needed. + */ + slave_reg_tmp = slave_reg; + for (byte = 0; byte < 4; byte++) { + addr_tmp = slave_reg_tmp & GENMASK(7, 0); + + /* + * Mark the first available slave address slot, i.e. its + * enabled bit should be unset. This slot might be used + * later on to register our slave. + */ + if (!avail && !MLXBF_I2C_SLAVE_ADDR_ENABLED(addr_tmp)) { + avail = true; + reg_avail = reg; + byte_avail = byte; + slave_reg_avail = slave_reg; + } + + /* + * Parse slave address bytes and check whether the + * slave address already exists and it's enabled, + * i.e. most significant bit is set. + */ + if ((addr_tmp & slave_addr_mask) == addr) { + if (MLXBF_I2C_SLAVE_ADDR_ENABLED(addr_tmp)) + return 0; + disabled = true; + break; + } + + /* Parse next byte. */ + slave_reg_tmp >>= 8; + } + + /* Exit the loop if the slave address is found. */ + if (disabled) + break; + } + + if (!avail && !disabled) + return -EINVAL; /* No room for a new slave address. */ + + if (avail && !disabled) { + reg = reg_avail; + byte = byte_avail; + /* Set the slave address. */ + slave_reg_avail &= ~(slave_addr_mask << (byte * 8)); + slave_reg_avail |= addr << (byte * 8); + slave_reg = slave_reg_avail; + } + + /* Enable the slave address and update the register. */ + slave_reg |= (1 << MLXBF_I2C_SMBUS_SLAVE_ADDR_EN_BIT) << (byte * 8); + mlxbf_i2c_write(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + reg * 0x4, slave_reg); + + return 0; +} + +static int mlxbf_slave_disable(struct mlxbf_i2c_priv *priv) +{ + u32 slave_reg, slave_reg_tmp, slave_addr_mask; + u8 addr, addr_tmp, reg, reg_cnt, slave_byte; + struct i2c_client *client = priv->slave; + bool exist; + + exist = false; + + addr = client->addr; + reg_cnt = MLXBF_I2C_SMBUS_SLAVE_ADDR_CNT >> 2; + slave_addr_mask = MLXBF_I2C_SMBUS_SLAVE_ADDR_MASK; + + /* + * Read the slave registers. There are 4 * 32-bit slave registers. + * Each slave register can hold up to 4 * 8-bit slave configuration + * (7-bit address, 1 status bit (1 if enabled, 0 if not)). + */ + for (reg = 0; reg < reg_cnt; reg++) { + slave_reg = mlxbf_i2c_read(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + reg * 0x4); + + /* Check whether the address slots are empty. */ + if (slave_reg == 0) + continue; + + /* + * Each register holds 4 slave addresses. So, we have to keep + * the byte order consistent with the value read in order to + * update the register correctly, if needed. + */ + slave_reg_tmp = slave_reg; + slave_byte = 0; + while (slave_reg_tmp != 0) { + addr_tmp = slave_reg_tmp & slave_addr_mask; + /* + * Parse slave address bytes and check whether the + * slave address already exists. + */ + if (addr_tmp == addr) { + exist = true; + break; + } + + /* Parse next byte. */ + slave_reg_tmp >>= 8; + slave_byte += 1; + } + + /* Exit the loop if the slave address is found. */ + if (exist) + break; + } + + if (!exist) + return 0; /* Slave is not registered, nothing to do. */ + + /* Cleanup the slave address slot. */ + slave_reg &= ~(GENMASK(7, 0) << (slave_byte * 8)); + mlxbf_i2c_write(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + reg * 0x4, slave_reg); + + return 0; +} + +static int mlxbf_i2c_init_coalesce(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *coalesce_res; + struct resource *params; + resource_size_t size; + int ret = 0; + + /* + * Unlike BlueField-1 platform, the coalesce registers is a dedicated + * resource in the next generations of BlueField. + */ + if (mlxbf_i2c_has_chip_type(priv, MLXBF_I2C_CHIP_TYPE_1)) { + coalesce_res = mlxbf_i2c_get_shared_resource(priv, + MLXBF_I2C_COALESCE_RES); + if (!coalesce_res) + return -EPERM; + + /* + * The Cause Coalesce group in TYU space is shared among + * I2C busses. This function MUST be serialized to avoid + * racing when claiming the memory region. + */ + lockdep_assert_held(mlxbf_i2c_gpio_res->lock); + + /* Check whether the memory map exist. */ + if (coalesce_res->io) { + priv->coalesce = coalesce_res; + return 0; + } + + params = coalesce_res->params; + size = resource_size(params); + + if (!request_mem_region(params->start, size, params->name)) + return -EFAULT; + + coalesce_res->io = ioremap(params->start, size); + if (IS_ERR(coalesce_res->io)) { + release_mem_region(params->start, size); + return PTR_ERR(coalesce_res->io); + } + + priv->coalesce = coalesce_res; + + } else { + ret = mlxbf_i2c_init_resource(pdev, &priv->coalesce, + MLXBF_I2C_COALESCE_RES); + } + + return ret; +} + +static int mlxbf_i2c_release_coalesce(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *coalesce_res; + struct device *dev = &pdev->dev; + struct resource *params; + resource_size_t size; + + coalesce_res = priv->coalesce; + + if (coalesce_res->io) { + params = coalesce_res->params; + size = resource_size(params); + if (mlxbf_i2c_has_chip_type(priv, MLXBF_I2C_CHIP_TYPE_1)) { + mutex_lock(coalesce_res->lock); + iounmap(coalesce_res->io); + release_mem_region(params->start, size); + mutex_unlock(coalesce_res->lock); + } else { + devm_release_mem_region(dev, params->start, size); + } + } + + return 0; +} + +static int mlxbf_i2c_init_slave(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct device *dev = &pdev->dev; + u32 int_reg; + int ret; + + /* Reset FSM. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_FSM, 0); + + /* + * Enable slave cause interrupt bits. Drive + * MLXBF_I2C_CAUSE_READ_WAIT_FW_RESPONSE and + * MLXBF_I2C_CAUSE_WRITE_SUCCESS, these are enabled when an external + * masters issue a Read and Write, respectively. But, clear all + * interrupts first. + */ + mlxbf_i2c_write(priv->slv_cause->io, + MLXBF_I2C_CAUSE_OR_CLEAR, ~0); + int_reg = MLXBF_I2C_CAUSE_READ_WAIT_FW_RESPONSE; + int_reg |= MLXBF_I2C_CAUSE_WRITE_SUCCESS; + mlxbf_i2c_write(priv->slv_cause->io, + MLXBF_I2C_CAUSE_OR_EVTEN0, int_reg); + + /* Finally, set the 'ready' bit to start handling transactions. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_READY, 0x1); + + /* Initialize the cause coalesce resource. */ + ret = mlxbf_i2c_init_coalesce(pdev, priv); + if (ret < 0) { + dev_err(dev, "failed to initialize cause coalesce\n"); + return ret; + } + + return 0; +} + +static bool mlxbf_i2c_has_coalesce(struct mlxbf_i2c_priv *priv, bool *read, + bool *write) +{ + const struct mlxbf_i2c_chip_info *chip = priv->chip; + u32 coalesce0_reg, cause_reg; + u8 slave_shift, is_set; + + *write = false; + *read = false; + + slave_shift = chip->type != MLXBF_I2C_CHIP_TYPE_1 ? + MLXBF_I2C_CAUSE_YU_SLAVE_BIT : + priv->bus + MLXBF_I2C_CAUSE_TYU_SLAVE_BIT; + + coalesce0_reg = mlxbf_i2c_read(priv->coalesce->io, + MLXBF_I2C_CAUSE_COALESCE_0); + is_set = coalesce0_reg & (1 << slave_shift); + + if (!is_set) + return false; + + /* Check the source of the interrupt, i.e. whether a Read or Write. */ + cause_reg = mlxbf_i2c_read(priv->slv_cause->io, + MLXBF_I2C_CAUSE_ARBITER); + if (cause_reg & MLXBF_I2C_CAUSE_READ_WAIT_FW_RESPONSE) + *read = true; + else if (cause_reg & MLXBF_I2C_CAUSE_WRITE_SUCCESS) + *write = true; + + /* Clear cause bits. */ + mlxbf_i2c_write(priv->slv_cause->io, MLXBF_I2C_CAUSE_OR_CLEAR, ~0x0); + + return true; +} + +static bool mlxbf_smbus_slave_wait_for_idle(struct mlxbf_i2c_priv *priv, + u32 timeout) +{ + u32 mask = MLXBF_I2C_CAUSE_S_GW_BUSY_FALL; + u32 addr = MLXBF_I2C_CAUSE_ARBITER; + + if (mlxbf_smbus_poll(priv->slv_cause->io, addr, mask, false, timeout)) + return true; + + return false; +} + +/* Send byte to 'external' smbus master. */ +static int mlxbf_smbus_irq_send(struct mlxbf_i2c_priv *priv, u8 recv_bytes) +{ + u8 data_desc[MLXBF_I2C_SLAVE_DATA_DESC_SIZE] = { 0 }; + u8 write_size, pec_en, addr, byte, value, byte_cnt, desc_size; + struct i2c_client *slave = priv->slave; + u32 control32, data32; + int ret; + + if (!slave) + return -EINVAL; + + addr = 0; + byte = 0; + desc_size = MLXBF_I2C_SLAVE_DATA_DESC_SIZE; + + /* + * Read bytes received from the external master. These bytes should + * be located in the first data descriptor register of the slave GW. + * These bytes are the slave address byte and the internal register + * address, if supplied. + */ + if (recv_bytes > 0) { + data32 = mlxbf_i2c_read_data(priv->smbus->io, + MLXBF_I2C_SLAVE_DATA_DESC_ADDR); + + /* Parse the received bytes. */ + switch (recv_bytes) { + case 2: + byte = (data32 >> 8) & GENMASK(7, 0); + fallthrough; + case 1: + addr = (data32 & GENMASK(7, 0)) >> 1; + } + + /* Check whether it's our slave address. */ + if (slave->addr != addr) + return -EINVAL; + } + + /* + * I2C read transactions may start by a WRITE followed by a READ. + * Indeed, most slave devices would expect the internal address + * following the slave address byte. So, write that byte first, + * and then, send the requested data bytes to the master. + */ + if (recv_bytes > 1) { + i2c_slave_event(slave, I2C_SLAVE_WRITE_REQUESTED, &value); + value = byte; + ret = i2c_slave_event(slave, I2C_SLAVE_WRITE_RECEIVED, + &value); + i2c_slave_event(slave, I2C_SLAVE_STOP, &value); + + if (ret < 0) + return ret; + } + + /* + * Now, send data to the master; currently, the driver supports + * READ_BYTE, READ_WORD and BLOCK READ protocols. Note that the + * hardware can send up to 128 bytes per transfer. That is the + * size of its data registers. + */ + i2c_slave_event(slave, I2C_SLAVE_READ_REQUESTED, &value); + + for (byte_cnt = 0; byte_cnt < desc_size; byte_cnt++) { + data_desc[byte_cnt] = value; + i2c_slave_event(slave, I2C_SLAVE_READ_PROCESSED, &value); + } + + /* Send a stop condition to the backend. */ + i2c_slave_event(slave, I2C_SLAVE_STOP, &value); + + /* Handle the actual transfer. */ + + /* Set the number of bytes to write to master. */ + write_size = (byte_cnt - 1) & 0x7f; + + /* Write data to Slave GW data descriptor. */ + mlxbf_i2c_smbus_write_data(priv, data_desc, byte_cnt, + MLXBF_I2C_SLAVE_DATA_DESC_ADDR); + + pec_en = 0; /* Disable PEC since it is not supported. */ + + /* Prepare control word. */ + control32 = MLXBF_I2C_SLAVE_ENABLE; + control32 |= rol32(write_size, MLXBF_I2C_SLAVE_WRITE_BYTES_SHIFT); + control32 |= rol32(pec_en, MLXBF_I2C_SLAVE_SEND_PEC_SHIFT); + + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_GW, control32); + + /* + * Wait until the transfer is completed; the driver will wait + * until the GW is idle, a cause will rise on fall of GW busy. + */ + mlxbf_smbus_slave_wait_for_idle(priv, MLXBF_I2C_SMBUS_TIMEOUT); + + /* Release the Slave GW. */ + mlxbf_i2c_write(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES, 0x0); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_PEC, 0x0); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_READY, 0x1); + + return 0; +} + +/* Receive bytes from 'external' smbus master. */ +static int mlxbf_smbus_irq_recv(struct mlxbf_i2c_priv *priv, u8 recv_bytes) +{ + u8 data_desc[MLXBF_I2C_SLAVE_DATA_DESC_SIZE] = { 0 }; + struct i2c_client *slave = priv->slave; + u8 value, byte, addr; + int ret = 0; + + if (!slave) + return -EINVAL; + + /* Read data from Slave GW data descriptor. */ + mlxbf_i2c_smbus_read_data(priv, data_desc, recv_bytes, + MLXBF_I2C_SLAVE_DATA_DESC_ADDR); + + /* Check whether its our slave address. */ + addr = data_desc[0] >> 1; + if (slave->addr != addr) + return -EINVAL; + + /* + * Notify the slave backend; another I2C master wants to write data + * to us. This event is sent once the slave address and the write bit + * is detected. + */ + i2c_slave_event(slave, I2C_SLAVE_WRITE_REQUESTED, &value); + + /* Send the received data to the slave backend. */ + for (byte = 1; byte < recv_bytes; byte++) { + value = data_desc[byte]; + ret = i2c_slave_event(slave, I2C_SLAVE_WRITE_RECEIVED, + &value); + if (ret < 0) + break; + } + + /* Send a stop condition to the backend. */ + i2c_slave_event(slave, I2C_SLAVE_STOP, &value); + + /* Release the Slave GW. */ + mlxbf_i2c_write(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES, 0x0); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_PEC, 0x0); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_READY, 0x1); + + return ret; +} + +static irqreturn_t mlxbf_smbus_irq(int irq, void *ptr) +{ + struct mlxbf_i2c_priv *priv = ptr; + bool read, write, irq_is_set; + u32 rw_bytes_reg; + u8 recv_bytes; + + /* + * Read TYU interrupt register and determine the source of the + * interrupt. Based on the source of the interrupt one of the + * following actions are performed: + * - Receive data and send response to master. + * - Send data and release slave GW. + * + * Handle read/write transaction only. CRmaster and Iarp requests + * are ignored for now. + */ + irq_is_set = mlxbf_i2c_has_coalesce(priv, &read, &write); + if (!irq_is_set || (!read && !write)) { + /* Nothing to do here, interrupt was not from this device. */ + return IRQ_NONE; + } + + /* + * The MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES includes the number of + * bytes from/to master. These are defined by 8-bits each. If the lower + * 8 bits are set, then the master expect to read N bytes from the + * slave, if the higher 8 bits are sent then the slave expect N bytes + * from the master. + */ + rw_bytes_reg = mlxbf_i2c_read(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES); + recv_bytes = (rw_bytes_reg >> 8) & GENMASK(7, 0); + + /* + * For now, the slave supports 128 bytes transfer. Discard remaining + * data bytes if the master wrote more than + * MLXBF_I2C_SLAVE_DATA_DESC_SIZE, i.e, the actual size of the slave + * data descriptor. + * + * Note that we will never expect to transfer more than 128 bytes; as + * specified in the SMBus standard, block transactions cannot exceed + * 32 bytes. + */ + recv_bytes = recv_bytes > MLXBF_I2C_SLAVE_DATA_DESC_SIZE ? + MLXBF_I2C_SLAVE_DATA_DESC_SIZE : recv_bytes; + + if (read) + mlxbf_smbus_irq_send(priv, recv_bytes); + else + mlxbf_smbus_irq_recv(priv, recv_bytes); + + return IRQ_HANDLED; +} + +/* Return negative errno on error. */ +static s32 mlxbf_i2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, + unsigned short flags, char read_write, + u8 command, int size, + union i2c_smbus_data *data) +{ + struct mlxbf_i2c_smbus_request request = { 0 }; + struct mlxbf_i2c_priv *priv; + bool read, pec; + u8 byte_cnt; + + request.slave = addr; + + read = (read_write == I2C_SMBUS_READ); + pec = flags & I2C_FUNC_SMBUS_PEC; + + switch (size) { + case I2C_SMBUS_QUICK: + mlxbf_i2c_smbus_quick_command(&request, read); + dev_dbg(&adap->dev, "smbus quick, slave 0x%02x\n", addr); + break; + + case I2C_SMBUS_BYTE: + mlxbf_i2c_smbus_byte_func(&request, + read ? &data->byte : &command, read, + pec); + dev_dbg(&adap->dev, "smbus %s byte, slave 0x%02x.\n", + read ? "read" : "write", addr); + break; + + case I2C_SMBUS_BYTE_DATA: + mlxbf_i2c_smbus_data_byte_func(&request, &command, &data->byte, + read, pec); + dev_dbg(&adap->dev, "smbus %s byte data at 0x%02x, slave 0x%02x.\n", + read ? "read" : "write", command, addr); + break; + + case I2C_SMBUS_WORD_DATA: + mlxbf_i2c_smbus_data_word_func(&request, &command, + (u8 *)&data->word, read, pec); + dev_dbg(&adap->dev, "smbus %s word data at 0x%02x, slave 0x%02x.\n", + read ? "read" : "write", command, addr); + break; + + case I2C_SMBUS_I2C_BLOCK_DATA: + byte_cnt = data->block[0]; + mlxbf_i2c_smbus_i2c_block_func(&request, &command, data->block, + &byte_cnt, read, pec); + dev_dbg(&adap->dev, "i2c %s block data, %d bytes at 0x%02x, slave 0x%02x.\n", + read ? "read" : "write", byte_cnt, command, addr); + break; + + case I2C_SMBUS_BLOCK_DATA: + byte_cnt = read ? I2C_SMBUS_BLOCK_MAX : data->block[0]; + mlxbf_i2c_smbus_block_func(&request, &command, data->block, + &byte_cnt, read, pec); + dev_dbg(&adap->dev, "smbus %s block data, %d bytes at 0x%02x, slave 0x%02x.\n", + read ? "read" : "write", byte_cnt, command, addr); + break; + + case I2C_FUNC_SMBUS_PROC_CALL: + mlxbf_i2c_smbus_process_call_func(&request, &command, + (u8 *)&data->word, pec); + dev_dbg(&adap->dev, "process call, wr/rd at 0x%02x, slave 0x%02x.\n", + command, addr); + break; + + case I2C_FUNC_SMBUS_BLOCK_PROC_CALL: + byte_cnt = data->block[0]; + mlxbf_i2c_smbus_blk_process_call_func(&request, &command, + data->block, &byte_cnt, + pec); + dev_dbg(&adap->dev, "block process call, wr/rd %d bytes, slave 0x%02x.\n", + byte_cnt, addr); + break; + + default: + dev_dbg(&adap->dev, "Unsupported I2C/SMBus command %d\n", + size); + return -EOPNOTSUPP; + } + + priv = i2c_get_adapdata(adap); + + return mlxbf_i2c_smbus_start_transaction(priv, &request); +} + +static int mlxbf_i2c_reg_slave(struct i2c_client *slave) +{ + struct mlxbf_i2c_priv *priv = i2c_get_adapdata(slave->adapter); + int ret; + + if (priv->slave) + return -EBUSY; + + /* + * Do not support ten bit chip address and do not use Packet Error + * Checking (PEC). + */ + if (slave->flags & (I2C_CLIENT_TEN | I2C_CLIENT_PEC)) + return -EAFNOSUPPORT; + + ret = mlxbf_slave_enable(priv, slave->addr); + if (ret < 0) + return ret; + + priv->slave = slave; + + return 0; +} + +static int mlxbf_i2c_unreg_slave(struct i2c_client *slave) +{ + struct mlxbf_i2c_priv *priv = i2c_get_adapdata(slave->adapter); + int ret; + + WARN_ON(!priv->slave); + + /* Unregister slave, i.e. disable the slave address in hardware. */ + ret = mlxbf_slave_disable(priv); + if (ret < 0) + return ret; + + priv->slave = NULL; + + return 0; +} + +static u32 mlxbf_i2c_functionality(struct i2c_adapter *adap) +{ + return MLXBF_I2C_FUNC_ALL; +} + +static struct mlxbf_i2c_chip_info mlxbf_i2c_chip[] = { + [MLXBF_I2C_CHIP_TYPE_1] = { + .type = MLXBF_I2C_CHIP_TYPE_1, + .shared_res = { + [0] = &mlxbf_i2c_coalesce_res[MLXBF_I2C_CHIP_TYPE_1], + [1] = &mlxbf_i2c_corepll_res[MLXBF_I2C_CHIP_TYPE_1], + [2] = &mlxbf_i2c_gpio_res[MLXBF_I2C_CHIP_TYPE_1] + }, + .calculate_freq = mlxbf_calculate_freq_from_tyu + }, + [MLXBF_I2C_CHIP_TYPE_2] = { + .type = MLXBF_I2C_CHIP_TYPE_2, + .shared_res = { + [0] = &mlxbf_i2c_corepll_res[MLXBF_I2C_CHIP_TYPE_2] + }, + .calculate_freq = mlxbf_calculate_freq_from_yu + } +}; + +static const struct i2c_algorithm mlxbf_i2c_algo = { + .smbus_xfer = mlxbf_i2c_smbus_xfer, + .functionality = mlxbf_i2c_functionality, + .reg_slave = mlxbf_i2c_reg_slave, + .unreg_slave = mlxbf_i2c_unreg_slave, +}; + +static struct i2c_adapter_quirks mlxbf_i2c_quirks = { + .max_read_len = MLXBF_I2C_MASTER_DATA_R_LENGTH, + .max_write_len = MLXBF_I2C_MASTER_DATA_W_LENGTH, +}; + +static const struct of_device_id mlxbf_i2c_dt_ids[] = { + { + .compatible = "mellanox,i2c-mlxbf1", + .data = &mlxbf_i2c_chip[MLXBF_I2C_CHIP_TYPE_1] + }, + { + .compatible = "mellanox,i2c-mlxbf2", + .data = &mlxbf_i2c_chip[MLXBF_I2C_CHIP_TYPE_2] + }, + {}, +}; + +MODULE_DEVICE_TABLE(of, mlxbf_i2c_dt_ids); + +static const struct acpi_device_id mlxbf_i2c_acpi_ids[] = { + { "MLNXBF03", (kernel_ulong_t)&mlxbf_i2c_chip[MLXBF_I2C_CHIP_TYPE_1] }, + { "MLNXBF23", (kernel_ulong_t)&mlxbf_i2c_chip[MLXBF_I2C_CHIP_TYPE_2] }, + {}, +}; + +MODULE_DEVICE_TABLE(acpi, mlxbf_i2c_acpi_ids); + +static int mlxbf_i2c_acpi_probe(struct device *dev, struct mlxbf_i2c_priv *priv) +{ + const struct acpi_device_id *aid; + struct acpi_device *adev; + unsigned long bus_id = 0; + const char *uid; + int ret; + + if (acpi_disabled) + return -ENOENT; + + adev = ACPI_COMPANION(dev); + if (!adev) + return -ENXIO; + + aid = acpi_match_device(mlxbf_i2c_acpi_ids, dev); + if (!aid) + return -ENODEV; + + priv->chip = (struct mlxbf_i2c_chip_info *)aid->driver_data; + + uid = acpi_device_uid(adev); + if (!uid || !(*uid)) { + dev_err(dev, "Cannot retrieve UID\n"); + return -ENODEV; + } + + ret = kstrtoul(uid, 0, &bus_id); + if (!ret) + priv->bus = bus_id; + + return ret; +} + +static int mlxbf_i2c_of_probe(struct device *dev, struct mlxbf_i2c_priv *priv) +{ + const struct of_device_id *oid; + int bus_id = -1; + + if (IS_ENABLED(CONFIG_OF) && dev->of_node) { + oid = of_match_node(mlxbf_i2c_dt_ids, dev->of_node); + if (!oid) + return -ENODEV; + + priv->chip = oid->data; + + bus_id = of_alias_get_id(dev->of_node, "i2c"); + if (bus_id >= 0) + priv->bus = bus_id; + } + + if (bus_id < 0) { + dev_err(dev, "Cannot get bus id"); + return bus_id; + } + + return 0; +} + +static int mlxbf_i2c_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct mlxbf_i2c_priv *priv; + struct i2c_adapter *adap; + int irq, ret; + + priv = devm_kzalloc(dev, sizeof(struct mlxbf_i2c_priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + ret = mlxbf_i2c_acpi_probe(dev, priv); + if (ret < 0 && ret != -ENOENT && ret != -ENXIO) + ret = mlxbf_i2c_of_probe(dev, priv); + + if (ret < 0) + return ret; + + ret = mlxbf_i2c_init_resource(pdev, &priv->smbus, + MLXBF_I2C_SMBUS_RES); + if (ret < 0) { + dev_err(dev, "Cannot fetch smbus resource info"); + return ret; + } + + ret = mlxbf_i2c_init_resource(pdev, &priv->mst_cause, + MLXBF_I2C_MST_CAUSE_RES); + if (ret < 0) { + dev_err(dev, "Cannot fetch cause master resource info"); + return ret; + } + + ret = mlxbf_i2c_init_resource(pdev, &priv->slv_cause, + MLXBF_I2C_SLV_CAUSE_RES); + if (ret < 0) { + dev_err(dev, "Cannot fetch cause slave resource info"); + return ret; + } + + adap = &priv->adap; + adap->owner = THIS_MODULE; + adap->class = I2C_CLASS_HWMON; + adap->algo = &mlxbf_i2c_algo; + adap->quirks = &mlxbf_i2c_quirks; + adap->dev.parent = dev; + adap->dev.of_node = dev->of_node; + adap->nr = priv->bus; + + snprintf(adap->name, sizeof(adap->name), "i2c%d", adap->nr); + i2c_set_adapdata(adap, priv); + + /* Read Core PLL frequency. */ + ret = mlxbf_i2c_calculate_corepll_freq(pdev, priv); + if (ret < 0) { + dev_err(dev, "cannot get core clock frequency\n"); + /* Set to default value. */ + priv->frequency = MLXBF_I2C_COREPLL_FREQ; + } + + /* + * Initialize master. + * Note that a physical bus might be shared among Linux and firmware + * (e.g., ATF). Thus, the bus should be initialized and ready and + * bus initialization would be unnecessary. This requires additional + * knowledge about physical busses. But, since an extra initialization + * does not really hurt, then keep the code as is. + */ + ret = mlxbf_i2c_init_master(pdev, priv); + if (ret < 0) { + dev_err(dev, "failed to initialize smbus master %d", + priv->bus); + return ret; + } + + mlxbf_i2c_init_timings(pdev, priv); + + mlxbf_i2c_init_slave(pdev, priv); + + irq = platform_get_irq(pdev, 0); + ret = devm_request_irq(dev, irq, mlxbf_smbus_irq, + IRQF_ONESHOT | IRQF_SHARED | IRQF_PROBE_SHARED, + dev_name(dev), priv); + if (ret < 0) { + dev_err(dev, "Cannot get irq %d\n", irq); + return ret; + } + + priv->irq = irq; + + platform_set_drvdata(pdev, priv); + + ret = i2c_add_numbered_adapter(adap); + if (ret < 0) + return ret; + + mutex_lock(&mlxbf_i2c_bus_lock); + mlxbf_i2c_bus_count++; + mutex_unlock(&mlxbf_i2c_bus_lock); + + return 0; +} + +static int mlxbf_i2c_remove(struct platform_device *pdev) +{ + struct mlxbf_i2c_priv *priv = platform_get_drvdata(pdev); + struct device *dev = &pdev->dev; + struct resource *params; + + params = priv->smbus->params; + devm_release_mem_region(dev, params->start, resource_size(params)); + + params = priv->mst_cause->params; + devm_release_mem_region(dev, params->start, resource_size(params)); + + params = priv->slv_cause->params; + devm_release_mem_region(dev, params->start, resource_size(params)); + + /* + * Release shared resources. This should be done when releasing + * the I2C controller. + */ + mutex_lock(&mlxbf_i2c_bus_lock); + if (--mlxbf_i2c_bus_count == 0) { + mlxbf_i2c_release_coalesce(pdev, priv); + mlxbf_i2c_release_corepll(pdev, priv); + mlxbf_i2c_release_gpio(pdev, priv); + } + mutex_unlock(&mlxbf_i2c_bus_lock); + + devm_free_irq(dev, priv->irq, priv); + + i2c_del_adapter(&priv->adap); + + return 0; +} + +static struct platform_driver mlxbf_i2c_driver = { + .probe = mlxbf_i2c_probe, + .remove = mlxbf_i2c_remove, + .driver = { + .name = "i2c-mlxbf", + .of_match_table = mlxbf_i2c_dt_ids, + .acpi_match_table = ACPI_PTR(mlxbf_i2c_acpi_ids), + }, +}; + +static int __init mlxbf_i2c_init(void) +{ + mutex_init(&mlxbf_i2c_coalesce_lock); + mutex_init(&mlxbf_i2c_corepll_lock); + mutex_init(&mlxbf_i2c_gpio_lock); + + mutex_init(&mlxbf_i2c_bus_lock); + + return platform_driver_register(&mlxbf_i2c_driver); +} +module_init(mlxbf_i2c_init); + +static void __exit mlxbf_i2c_exit(void) +{ + platform_driver_unregister(&mlxbf_i2c_driver); + + mutex_destroy(&mlxbf_i2c_bus_lock); + + mutex_destroy(&mlxbf_i2c_gpio_lock); + mutex_destroy(&mlxbf_i2c_corepll_lock); + mutex_destroy(&mlxbf_i2c_coalesce_lock); +} +module_exit(mlxbf_i2c_exit); + +MODULE_DESCRIPTION("Mellanox BlueField I2C bus driver"); +MODULE_AUTHOR("Khalil Blaiech "); +MODULE_LICENSE("GPL v2"); From 91a73027f28748e8d84549bdc3302254fba80344 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Sep 2020 17:06:41 +0200 Subject: [PATCH 107/243] i2c: rk3x: Simplify with dev_err_probe() Common pattern of handling deferred probe can be simplified with dev_err_probe(). Less code and the error value gets printed. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Heiko Stuebner Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rk3x.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c index 8e3cc85d1921..819ab4ee517e 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c @@ -1312,18 +1312,13 @@ static int rk3x_i2c_probe(struct platform_device *pdev) i2c->pclk = devm_clk_get(&pdev->dev, "pclk"); } - if (IS_ERR(i2c->clk)) { - ret = PTR_ERR(i2c->clk); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "Can't get bus clk: %d\n", ret); - return ret; - } - if (IS_ERR(i2c->pclk)) { - ret = PTR_ERR(i2c->pclk); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "Can't get periph clk: %d\n", ret); - return ret; - } + if (IS_ERR(i2c->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(i2c->clk), + "Can't get bus clk\n"); + + if (IS_ERR(i2c->pclk)) + return dev_err_probe(&pdev->dev, PTR_ERR(i2c->pclk), + "Can't get periph clk\n"); ret = clk_prepare(i2c->clk); if (ret < 0) { From e50e4f0b85be308a01b830c5fbdffc657e1a6dd0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 20 Sep 2020 23:12:38 +0200 Subject: [PATCH 108/243] i2c: imx: Fix external abort on interrupt in exit paths If interrupt comes late, during probe error path or device remove (could be triggered with CONFIG_DEBUG_SHIRQ), the interrupt handler i2c_imx_isr() will access registers with the clock being disabled. This leads to external abort on non-linefetch on Toradex Colibri VF50 module (with Vybrid VF5xx): Unhandled fault: external abort on non-linefetch (0x1008) at 0x8882d003 Internal error: : 1008 [#1] ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 5.7.0 #607 Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree) (i2c_imx_isr) from [<8017009c>] (free_irq+0x25c/0x3b0) (free_irq) from [<805844ec>] (release_nodes+0x178/0x284) (release_nodes) from [<80580030>] (really_probe+0x10c/0x348) (really_probe) from [<80580380>] (driver_probe_device+0x60/0x170) (driver_probe_device) from [<80580630>] (device_driver_attach+0x58/0x60) (device_driver_attach) from [<805806bc>] (__driver_attach+0x84/0xc0) (__driver_attach) from [<8057e228>] (bus_for_each_dev+0x68/0xb4) (bus_for_each_dev) from [<8057f3ec>] (bus_add_driver+0x144/0x1ec) (bus_add_driver) from [<80581320>] (driver_register+0x78/0x110) (driver_register) from [<8010213c>] (do_one_initcall+0xa8/0x2f4) (do_one_initcall) from [<80c0100c>] (kernel_init_freeable+0x178/0x1dc) (kernel_init_freeable) from [<80807048>] (kernel_init+0x8/0x110) (kernel_init) from [<80100114>] (ret_from_fork+0x14/0x20) Additionally, the i2c_imx_isr() could wake up the wait queue (imx_i2c_struct->queue) before its initialization happens. The resource-managed framework should not be used for interrupt handling, because the resource will be released too late - after disabling clocks. The interrupt handler is not prepared for such case. Fixes: 1c4b6c3bcf30 ("i2c: imx: implement bus recovery") Cc: Signed-off-by: Krzysztof Kozlowski Acked-by: Oleksij Rempel Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 63f4367c312b..c98529c76348 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1169,14 +1169,6 @@ static int i2c_imx_probe(struct platform_device *pdev) return ret; } - /* Request IRQ */ - ret = devm_request_irq(&pdev->dev, irq, i2c_imx_isr, IRQF_SHARED, - pdev->name, i2c_imx); - if (ret) { - dev_err(&pdev->dev, "can't claim irq %d\n", irq); - goto clk_disable; - } - /* Init queue */ init_waitqueue_head(&i2c_imx->queue); @@ -1195,6 +1187,14 @@ static int i2c_imx_probe(struct platform_device *pdev) if (ret < 0) goto rpm_disable; + /* Request IRQ */ + ret = request_threaded_irq(irq, i2c_imx_isr, NULL, IRQF_SHARED, + pdev->name, i2c_imx); + if (ret) { + dev_err(&pdev->dev, "can't claim irq %d\n", irq); + goto rpm_disable; + } + /* Set up clock divider */ i2c_imx->bitrate = I2C_MAX_STANDARD_MODE_FREQ; ret = of_property_read_u32(pdev->dev.of_node, @@ -1237,13 +1237,12 @@ static int i2c_imx_probe(struct platform_device *pdev) clk_notifier_unregister: clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); + free_irq(irq, i2c_imx); rpm_disable: pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); - -clk_disable: clk_disable_unprepare(i2c_imx->clk); return ret; } @@ -1251,7 +1250,7 @@ static int i2c_imx_probe(struct platform_device *pdev) static int i2c_imx_remove(struct platform_device *pdev) { struct imx_i2c_struct *i2c_imx = platform_get_drvdata(pdev); - int ret; + int irq, ret; ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) @@ -1271,6 +1270,9 @@ static int i2c_imx_remove(struct platform_device *pdev) imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2SR); clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); + irq = platform_get_irq(pdev, 0); + if (irq >= 0) + free_irq(irq, i2c_imx); clk_disable_unprepare(i2c_imx->clk); pm_runtime_put_noidle(&pdev->dev); From 41c38c272e2cc021c833030c3f5a97945ae2b773 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 17 Sep 2020 21:13:18 +0200 Subject: [PATCH 109/243] dt-bindings: i2c: imx-lpi2c: Add properties and use unevaluatedProperties Additional properties actually might appear (e.g. power-domains) so describe all typical properties, reference generic i2c schema and use unevaluatedProperties to fix dtbs_check warnings like: arch/arm64/boot/dts/freescale/imx8qxp-ai_ml.dt.yaml: i2c@5a800000: 'assigned-clock-rates', 'assigned-clocks', 'clock-names', 'power-domains' do not match any of the regexes: 'pinctrl-[0-9]+' arch/arm64/boot/dts/freescale/imx8qxp-colibri-eval-v3.dt.yaml: i2c@5a800000: 'touchscreen@2c' does not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Wolfram Sang --- .../devicetree/bindings/i2c/i2c-imx-lpi2c.yaml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml index ac0bc5dd64d6..bf68489eecd1 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml @@ -9,6 +9,9 @@ title: Freescale Low Power Inter IC (LPI2C) for i.MX maintainers: - Anson Huang +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + properties: compatible: enum: @@ -22,23 +25,34 @@ properties: interrupts: maxItems: 1 + assigned-clock-parents: true + assigned-clock-rates: true + assigned-clocks: true + clock-frequency: true + + clock-names: + maxItems: 1 + clocks: maxItems: 1 + power-domains: + maxItems: 1 + required: - compatible - reg - interrupts - clocks -additionalProperties: false +unevaluatedProperties: false examples: - | #include #include - lpi2c7@40a50000 { + i2c@40a50000 { compatible = "fsl,imx7ulp-lpi2c"; reg = <0x40A50000 0x10000>; interrupt-parent = <&intc>; From 755f6292e3fae4a08ce3d238c9508b84a73ac47d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 17 Sep 2020 21:13:19 +0200 Subject: [PATCH 110/243] dt-bindings: i2c: imx-lpi2c: Fix i.MX 8QXP compatible matching The i.MX 8QXP DTSes use two compatibles so update the binding to fix dtbs_check warnings like: arch/arm64/boot/dts/freescale/imx8qxp-mek.dt.yaml: i2c@5a820000: compatible: ['fsl,imx8qxp-lpi2c', 'fsl,imx7ulp-lpi2c'] is too long Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Wolfram Sang --- .../devicetree/bindings/i2c/i2c-imx-lpi2c.yaml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml index bf68489eecd1..29b9447f3b84 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml @@ -14,10 +14,13 @@ allOf: properties: compatible: - enum: - - fsl,imx7ulp-lpi2c - - fsl,imx8qxp-lpi2c - - fsl,imx8qm-lpi2c + oneOf: + - enum: + - fsl,imx7ulp-lpi2c + - fsl,imx8qm-lpi2c + - items: + - const: fsl,imx8qxp-lpi2c + - const: fsl,imx7ulp-lpi2c reg: maxItems: 1 From 37859693f83f9325559d8ec980310b3a13fa5924 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 17 Sep 2020 21:13:20 +0200 Subject: [PATCH 111/243] dt-bindings: i2c: imx: Add properties and use unevaluatedProperties Additional properties actually might appear (e.g. power-domains) so describe all typical properties, reference generic i2c schema and use unevaluatedProperties to fix dtbs_check warnings like: arch/arm64/boot/dts/freescale/imx8mn-evk.dt.yaml: i2c@30a20000: '#address-cells', '#size-cells', 'pmic@25' do not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-imx.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml index 810536953177..a0c87307ca07 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml @@ -9,6 +9,9 @@ title: Freescale Inter IC (I2C) and High Speed Inter IC (HS-I2C) for i.MX maintainers: - Wolfram Sang +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + properties: compatible: oneOf: @@ -75,7 +78,7 @@ required: - interrupts - clocks -additionalProperties: false +unevaluatedProperties: false examples: - | From d0fa235c0a6131925ff1e9839f1135c693e73dfa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 17 Sep 2020 21:13:21 +0200 Subject: [PATCH 112/243] dt-bindings: i2c: imx: Fix i.MX 7 compatible matching The i.MX 7 DTSes use two compatibles so update the binding to fix dtbs_check warnings like: arch/arm/boot/dts/imx7d-cl-som-imx7.dt.yaml: i2c@30a20000: compatible: ['fsl,imx7d-i2c', 'fsl,imx21-i2c'] is not valid under any of the given schemas (Possible causes of the failure): arch/arm/boot/dts/imx7d-cl-som-imx7.dt.yaml: i2c@30a20000: compatible: ['fsl,imx7d-i2c', 'fsl,imx21-i2c'] is too long Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-imx.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml index a0c87307ca07..f23966b0d6c6 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml @@ -21,6 +21,9 @@ properties: - items: - const: fsl,imx35-i2c - const: fsl,imx1-i2c + - items: + - const: fsl,imx7d-i2c + - const: fsl,imx21-i2c - items: - enum: - fsl,imx25-i2c From 332fdaebb64e6fe6fdd1b0463c88bafec0faa199 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 24 Sep 2020 16:52:17 +0300 Subject: [PATCH 113/243] i2c: i801: Add support for Intel Alder Lake PCH-S Add PCI ID of SMBus controller on Intel Alder Lake PCH-S Signed-off-by: Jarkko Nikula Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/busses/i2c-i801.rst | 1 + drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-i801.c | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/Documentation/i2c/busses/i2c-i801.rst b/Documentation/i2c/busses/i2c-i801.rst index faf32330c335..42bbdd6e7fd8 100644 --- a/Documentation/i2c/busses/i2c-i801.rst +++ b/Documentation/i2c/busses/i2c-i801.rst @@ -44,6 +44,7 @@ Supported adapters: * Intel Tiger Lake (PCH) * Intel Jasper Lake (SOC) * Intel Emmitsburg (PCH) + * Intel Alder Lake (PCH) Datasheets: Publicly available at the Intel website diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 96685b273f63..a4f473ef4e5c 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -147,6 +147,7 @@ config I2C_I801 Tiger Lake (PCH) Jasper Lake (SOC) Emmitsburg (PCH) + Alder Lake (PCH) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index efab1e71ad6a..dbcaf1d0cd37 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -71,6 +71,7 @@ * Tiger Lake-H (PCH) 0x43a3 32 hard yes yes yes * Jasper Lake (SOC) 0x4da3 32 hard yes yes yes * Comet Lake-V (PCH) 0xa3a3 32 hard yes yes yes + * Alder Lake-S (PCH) 0x7aa3 32 hard yes yes yes * * Features supported by this driver: * Software PEC no @@ -228,6 +229,7 @@ #define PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS 0x4b23 #define PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS 0x4da3 #define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS 0x5ad4 +#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_S_SMBUS 0x7aa3 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_SMBUS 0x8c22 #define PCI_DEVICE_ID_INTEL_WILDCATPOINT_SMBUS 0x8ca2 #define PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS 0x8d22 @@ -1081,6 +1083,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ALDER_LAKE_S_SMBUS) }, { 0, } }; @@ -1758,6 +1761,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS: case PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS: case PCI_DEVICE_ID_INTEL_EBG_SMBUS: + case PCI_DEVICE_ID_INTEL_ALDER_LAKE_S_SMBUS: priv->features |= FEATURE_BLOCK_PROC; priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; From 795ee9b8a1076915c3b08eb73cf9fd4da9c52ae6 Mon Sep 17 00:00:00 2001 From: Wang ShaoBo Date: Fri, 18 Sep 2020 16:25:08 +0800 Subject: [PATCH 114/243] i2c: efm32: Use devm_platform_get_and_ioremap_resource() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make use of devm_platform_get_and_ioremap_resource() provided by driver core platform instead of duplicated analogue. dev_err() is removed because it has been done in devm_ioremap_resource(). Signed-off-by: Wang ShaoBo Acked-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-efm32.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-efm32.c b/drivers/i2c/busses/i2c-efm32.c index 838ce0947191..f6e13ceeb2b3 100644 --- a/drivers/i2c/busses/i2c-efm32.c +++ b/drivers/i2c/busses/i2c-efm32.c @@ -332,21 +332,15 @@ static int efm32_i2c_probe(struct platform_device *pdev) return ret; } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "failed to determine base address\n"); - return -ENODEV; - } + ddata->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(ddata->base)) + return PTR_ERR(ddata->base); if (resource_size(res) < 0x42) { dev_err(&pdev->dev, "memory resource too small\n"); return -EINVAL; } - ddata->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(ddata->base)) - return PTR_ERR(ddata->base); - ret = platform_get_irq(pdev, 0); if (ret <= 0) { if (!ret) From 06856269d43ab3ce6609c90a6e42bcd9564eaa02 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 9 Sep 2020 16:10:01 +1200 Subject: [PATCH 115/243] i2c: busses: replace spin_lock_irqsave by spin_lock in hard IRQ The code has been in a irq-disabled context since it is hard IRQ. There is no necessity to do it again. Signed-off-by: Barry Song Reviewed-by: Akash Asthana Reviewed-by: Mukesh Kumar Savaliya Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mv64xxx.c | 5 ++--- drivers/i2c/busses/i2c-owl.c | 5 ++--- drivers/i2c/busses/i2c-qcom-geni.c | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 8d9d4ffdcd24..e0e45fc19b8f 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -496,11 +496,10 @@ static irqreturn_t mv64xxx_i2c_intr(int irq, void *dev_id) { struct mv64xxx_i2c_data *drv_data = dev_id; - unsigned long flags; u32 status; irqreturn_t rc = IRQ_NONE; - spin_lock_irqsave(&drv_data->lock, flags); + spin_lock(&drv_data->lock); if (drv_data->offload_enabled) rc = mv64xxx_i2c_intr_offload(drv_data); @@ -517,7 +516,7 @@ mv64xxx_i2c_intr(int irq, void *dev_id) rc = IRQ_HANDLED; } - spin_unlock_irqrestore(&drv_data->lock, flags); + spin_unlock(&drv_data->lock); return rc; } diff --git a/drivers/i2c/busses/i2c-owl.c b/drivers/i2c/busses/i2c-owl.c index 672f1f239bd6..618d3013d0b6 100644 --- a/drivers/i2c/busses/i2c-owl.c +++ b/drivers/i2c/busses/i2c-owl.c @@ -165,10 +165,9 @@ static irqreturn_t owl_i2c_interrupt(int irq, void *_dev) { struct owl_i2c_dev *i2c_dev = _dev; struct i2c_msg *msg = i2c_dev->msg; - unsigned long flags; unsigned int stat, fifostat; - spin_lock_irqsave(&i2c_dev->lock, flags); + spin_lock(&i2c_dev->lock); i2c_dev->err = 0; @@ -208,7 +207,7 @@ static irqreturn_t owl_i2c_interrupt(int irq, void *_dev) OWL_I2C_STAT_IRQP, true); complete_all(&i2c_dev->msg_complete); - spin_unlock_irqrestore(&i2c_dev->lock, flags); + spin_unlock(&i2c_dev->lock); return IRQ_HANDLED; } diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index dead5db3315a..8b4c35f47a70 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -210,9 +210,8 @@ static irqreturn_t geni_i2c_irq(int irq, void *dev) u32 dma; u32 val; struct i2c_msg *cur; - unsigned long flags; - spin_lock_irqsave(&gi2c->lock, flags); + spin_lock(&gi2c->lock); m_stat = readl_relaxed(base + SE_GENI_M_IRQ_STATUS); rx_st = readl_relaxed(base + SE_GENI_RX_FIFO_STATUS); dm_tx_st = readl_relaxed(base + SE_DMA_TX_IRQ_STAT); @@ -294,7 +293,7 @@ static irqreturn_t geni_i2c_irq(int irq, void *dev) dm_rx_st & RX_DMA_DONE || dm_rx_st & RX_RESET_DONE) complete(&gi2c->done); - spin_unlock_irqrestore(&gi2c->lock, flags); + spin_unlock(&gi2c->lock); return IRQ_HANDLED; } From ba1bcafb29571f525bf563972e4241998db74e98 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 17 Sep 2020 13:46:56 +0200 Subject: [PATCH 116/243] rtc: rx8010: rename rx8010_init_client() to rx8010_init() Since the switch to using regmap this function no longer takes the I2C client struct as argument nor do we even interact with the client anywhere other than when creating the regmap. Rename it to a less misleading name: "rx8010_init()". Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200917114656.9036-1-brgl@bgdev.pl --- drivers/rtc/rtc-rx8010.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 01e9017d4025..dca41a2a39b2 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -169,7 +169,7 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt) return 0; } -static int rx8010_init_client(struct device *dev) +static int rx8010_init(struct device *dev) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); u8 ctrl[2]; @@ -391,7 +391,7 @@ static int rx8010_probe(struct i2c_client *client) if (IS_ERR(rx8010->regs)) return PTR_ERR(rx8010->regs); - err = rx8010_init_client(dev); + err = rx8010_init(dev); if (err) return err; From 9ce42e8e0323d39fad01f3d17c35dd16d91c4f46 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Sat, 19 Sep 2020 18:08:56 +0800 Subject: [PATCH 117/243] rtc: meson: simplify the return expression of meson_vrtc_probe Simplify the return expression. Signed-off-by: Liu Shixin Signed-off-by: Alexandre Belloni Acked-by: Kevin Hilman Link: https://lore.kernel.org/r/20200919100856.1639319-1-liushixin2@huawei.com --- drivers/rtc/rtc-meson-vrtc.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-meson-vrtc.c b/drivers/rtc/rtc-meson-vrtc.c index 89e5ba0dae69..e6bd0808a092 100644 --- a/drivers/rtc/rtc-meson-vrtc.c +++ b/drivers/rtc/rtc-meson-vrtc.c @@ -65,7 +65,6 @@ static const struct rtc_class_ops meson_vrtc_ops = { static int meson_vrtc_probe(struct platform_device *pdev) { struct meson_vrtc_data *vrtc; - int ret; vrtc = devm_kzalloc(&pdev->dev, sizeof(*vrtc), GFP_KERNEL); if (!vrtc) @@ -84,11 +83,7 @@ static int meson_vrtc_probe(struct platform_device *pdev) return PTR_ERR(vrtc->rtc); vrtc->rtc->ops = &meson_vrtc_ops; - ret = rtc_register_device(vrtc->rtc); - if (ret) - return ret; - - return 0; + return rtc_register_device(vrtc->rtc); } static int __maybe_unused meson_vrtc_suspend(struct device *dev) From 179b4bcc4c0cf62e3737c718e05f5a69b3e9041c Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Mon, 21 Sep 2020 16:24:49 +0800 Subject: [PATCH 118/243] rtc: rv8803: simplify the return expression of rv8803_nvram_write Simplify the return expression. Signed-off-by: Liu Shixin Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200921082449.2591981-1-liushixin2@huawei.com --- drivers/rtc/rtc-rv8803.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index 93c3a6b627bd..c6d8e3425688 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -454,13 +454,7 @@ static int rv8803_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) static int rv8803_nvram_write(void *priv, unsigned int offset, void *val, size_t bytes) { - int ret; - - ret = rv8803_write_reg(priv, RV8803_RAM, *(u8 *)val); - if (ret) - return ret; - - return 0; + return rv8803_write_reg(priv, RV8803_RAM, *(u8 *)val); } static int rv8803_nvram_read(void *priv, unsigned int offset, From fb08334bb38f56d308d3e15b47bca67529cffc87 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 17 Sep 2020 14:45:45 -0700 Subject: [PATCH 119/243] nfs: remove incorrect fallthrough label There is no case after the default from which to fallthrough to. Clang will error in this case (unhelpfully without context, see link below) and GCC will with -Wswitch-unreachable. The previous commit should have just replaced the comment with a break statement. If we consider implicit fallthrough to be a design mistake of C, then all case statements should be terminated with one of the following statements: * break * continue * return * fallthrough * goto * (call of function with __attribute__(__noreturn__)) Fixes: 2a1390c95a69 ("nfs: Convert to use the preferred fallthrough macro") Link: https://bugs.llvm.org/show_bug.cgi?id=47539 Acked-by: Gustavo A. R. Silva Reviewed-by: Gustavo A. R. Silva Reviewed-by: Miaohe Lin Reviewed-by: Nathan Chancellor Suggested-by: Joe Perches Signed-off-by: Nick Desaulniers Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d20326ee0475..eb2401079b04 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -889,7 +889,7 @@ static struct nfs_server *nfs_try_mount_request(struct fs_context *fc) default: if (rpcauth_get_gssinfo(flavor, &info) != 0) continue; - fallthrough; + break; } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); ctx->selected_flavor = flavor; From b4868b44c5628995fdd8ef2e24dda73cef963a75 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 25 Sep 2020 15:48:39 -0400 Subject: [PATCH 120/243] NFSv4: Wait for stateid updates after CLOSE/OPEN_DOWNGRADE Since commit 0e0cb35b417f ("NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE") the following livelock may occur if a CLOSE races with the update of the nfs_state: Process 1 Process 2 Server ========= ========= ======== OPEN file OPEN file Reply OPEN (1) Reply OPEN (2) Update state (1) CLOSE file (1) Reply OLD_STATEID (1) CLOSE file (2) Reply CLOSE (-1) Update state (2) wait for state change OPEN file wake CLOSE file OPEN file wake CLOSE file ... ... We can avoid this situation by not issuing an immediate retry with a bumped seqid when CLOSE/OPEN_DOWNGRADE receives NFS4ERR_OLD_STATEID. Instead, take the same approach used by OPEN and wait at least 5 seconds for outstanding stateid updates to complete if we can detect that we're out of sequence. Note that after this change it is still possible (though unlikely) that CLOSE waits a full 5 seconds, bumps the seqid, and retries -- and that attempt races with another OPEN at the same time. In order to avoid this race (which would result in the livelock), update nfs_need_update_open_stateid() to handle the case where: - the state is NFS_OPEN_STATE, and - the stateid doesn't match the current open stateid Finally, nfs_need_update_open_stateid() is modified to be idempotent and renamed to better suit the purpose of signaling that the stateid passed is the next stateid in sequence. Fixes: 0e0cb35b417f ("NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 8 +++++ fs/nfs/nfs4proc.c | 81 +++++++++++++++++++++++++++------------------- fs/nfs/nfs4trace.h | 1 + 3 files changed, 56 insertions(+), 34 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 0c9505dc852c..065cb04222a1 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -599,6 +599,14 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0; } +static inline bool nfs4_stateid_is_next(const nfs4_stateid *s1, const nfs4_stateid *s2) +{ + u32 seq1 = be32_to_cpu(s1->seqid); + u32 seq2 = be32_to_cpu(s2->seqid); + + return seq2 == seq1 + 1U || (seq2 == 1U && seq1 == 0xffffffffU); +} + static inline bool nfs4_stateid_match_or_older(const nfs4_stateid *dst, const nfs4_stateid *src) { return nfs4_stateid_match_other(dst, src) && diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 542961ffa529..f7ef2ca699a5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1550,19 +1550,6 @@ static void nfs_state_log_update_open_stateid(struct nfs4_state *state) wake_up_all(&state->waitq); } -static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state, - const nfs4_stateid *stateid) -{ - u32 state_seqid = be32_to_cpu(state->open_stateid.seqid); - u32 stateid_seqid = be32_to_cpu(stateid->seqid); - - if (stateid_seqid == state_seqid + 1U || - (stateid_seqid == 1U && state_seqid == 0xffffffffU)) - nfs_state_log_update_open_stateid(state); - else - set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); -} - static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) { struct nfs_client *clp = state->owner->so_server->nfs_client; @@ -1588,21 +1575,19 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) * i.e. The stateid seqids have to be initialised to 1, and * are then incremented on every state transition. */ -static bool nfs_need_update_open_stateid(struct nfs4_state *state, +static bool nfs_stateid_is_sequential(struct nfs4_state *state, const nfs4_stateid *stateid) { - if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 || - !nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (test_bit(NFS_OPEN_STATE, &state->flags)) { + /* The common case - we're updating to a new sequence number */ + if (nfs4_stateid_match_other(stateid, &state->open_stateid) && + nfs4_stateid_is_next(&state->open_stateid, stateid)) { + return true; + } + } else { + /* This is the first OPEN in this generation */ if (stateid->seqid == cpu_to_be32(1)) - nfs_state_log_update_open_stateid(state); - else - set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); - return true; - } - - if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) { - nfs_state_log_out_of_order_open_stateid(state, stateid); - return true; + return true; } return false; } @@ -1676,16 +1661,16 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, int status = 0; for (;;) { - if (!nfs_need_update_open_stateid(state, stateid)) - return; - if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags)) + if (nfs_stateid_is_sequential(state, stateid)) break; + if (status) break; /* Rely on seqids for serialisation with NFSv4.0 */ if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client)) break; + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); /* * Ensure we process the state changes in the same order @@ -1696,6 +1681,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, spin_unlock(&state->owner->so_lock); rcu_read_unlock(); trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); + if (!signal_pending(current)) { if (schedule_timeout(5*HZ) == 0) status = -EAGAIN; @@ -3438,7 +3424,8 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, __be32 seqid_open; u32 dst_seqid; bool ret; - int seq; + int seq, status = -EAGAIN; + DEFINE_WAIT(wait); for (;;) { ret = false; @@ -3450,15 +3437,41 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, continue; break; } + + write_seqlock(&state->seqlock); seqid_open = state->open_stateid.seqid; - if (read_seqretry(&state->seqlock, seq)) - continue; dst_seqid = be32_to_cpu(dst->seqid); - if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) >= 0) - dst->seqid = cpu_to_be32(dst_seqid + 1); - else + + /* Did another OPEN bump the state's seqid? try again: */ + if ((s32)(be32_to_cpu(seqid_open) - dst_seqid) > 0) { dst->seqid = seqid_open; + write_sequnlock(&state->seqlock); + ret = true; + break; + } + + /* server says we're behind but we haven't seen the update yet */ + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); + prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); + write_sequnlock(&state->seqlock); + trace_nfs4_close_stateid_update_wait(state->inode, dst, 0); + + if (signal_pending(current)) + status = -EINTR; + else + if (schedule_timeout(5*HZ) != 0) + status = 0; + + finish_wait(&state->waitq, &wait); + + if (!status) + continue; + if (status == -EINTR) + break; + + /* we slept the whole 5 seconds, we must have lost a seqid */ + dst->seqid = cpu_to_be32(dst_seqid + 1); ret = true; break; } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index b4f852d4d099..484c1da96dea 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1511,6 +1511,7 @@ DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_close_stateid_update_wait); DECLARE_EVENT_CLASS(nfs4_getattr_event, TP_PROTO( From 5904c16d2210b967caf66b04a0c26cfa6a7a0328 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Sun, 27 Sep 2020 04:42:20 -0700 Subject: [PATCH 121/243] fs: nfs: return per memcg count for xattr shrinkers The list_lru_count() returns the pre node count, but the new xattr shrinkers are memcg aware, so the shrinkers should return per memcg count by calling list_lru_shrink_count() instead. Otherwise over-shrink might be experienced. The problem was spotted by visual code inspection. Cc: Trond Myklebust Cc: Anna Schumaker Cc: Frank van der Linden Signed-off-by: Yang Shi Reviewed-by: Frank van der Linden Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index 22396a7eebe1..b51424ff8159 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -881,7 +881,7 @@ nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc) { unsigned long count; - count = list_lru_count(&nfs4_xattr_cache_lru); + count = list_lru_shrink_count(&nfs4_xattr_cache_lru, sc); return vfs_pressure_ratio(count); } @@ -975,7 +975,7 @@ nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; - count = list_lru_count(lru); + count = list_lru_shrink_count(lru, sc); return vfs_pressure_ratio(count); } From 35baff672f80afe569905e3873d961e1ed2d64a1 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 28 Sep 2020 13:06:47 +0200 Subject: [PATCH 122/243] i2c: testunit: improve documentation Mention that new CMDs will be NACKed while the old one is still on-going, that the I2C address parameter of READ_BYTES is 7 bit only, and reword one paragraph to be more precise. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- Documentation/i2c/slave-testunit-backend.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Documentation/i2c/slave-testunit-backend.rst b/Documentation/i2c/slave-testunit-backend.rst index f537c62a8a83..2c38e64f0bac 100644 --- a/Documentation/i2c/slave-testunit-backend.rst +++ b/Documentation/i2c/slave-testunit-backend.rst @@ -20,10 +20,10 @@ Instantiating the device is regular. Example for bus 0, address 0x30: # echo "slave-testunit 0x1030" > /sys/bus/i2c/devices/i2c-0/new_device -After that, you will have a write-only device listening. Reads will return an -8-bit version number. The device consists of 4 8-bit registers and all must be -written to start a testcase, i.e. you must always write 4 bytes to the device. -The registers are: +After that, you will have a write-only device listening. Reads will just return +an 8-bit version number of the testunit. When writing, the device consists of 4 +8-bit registers and all must be written to start a testcase, i.e. you must +always write 4 bytes to the device. The registers are: 0x00 CMD - which test to trigger 0x01 DATAL - configuration byte 1 for the test @@ -35,6 +35,9 @@ Using 'i2cset' from the i2c-tools package, the generic command looks like: # i2cset -y i DELAY is a generic parameter which will delay the execution of the test in CMD. +While a command is running (including the delay), new commands will not be +acknowledged. You need to wait until the old one is completed. + The commands are described in the following section. An invalid command will result in the transfer not being acknowledged. @@ -44,7 +47,7 @@ Commands 0x00 NOOP (reserved for future use) 0x01 READ_BYTES (also needs master mode) - DATAL - address to read data from + DATAL - address to read data from (lower 7 bits, highest bit currently unused) DATAH - number of bytes to read This is useful to test if your bus master driver is handling multi-master From 900aed24d3e45353e22f7fc00d6826b87c55761a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:44 +0300 Subject: [PATCH 123/243] i2c: tegra: Make tegra_i2c_flush_fifos() usable in atomic transfer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tegra_i2c_flush_fifos() shouldn't sleep in atomic transfer and jiffies are not updating if interrupts are disabled. Let's switch to use iopoll API helpers for register-polling. The iopoll API provides helpers for both atomic and non-atomic cases. Note that this patch doesn't fix any known problem because normally FIFO is flushed at the time of starting a new transfer. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 00d3e4d7a01e..ab88cdd70376 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -470,9 +470,9 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) static int tegra_i2c_flush_fifos(struct tegra_i2c_dev *i2c_dev) { - unsigned long timeout = jiffies + HZ; - unsigned int offset; - u32 mask, val; + u32 mask, val, offset, reg_offset; + void __iomem *addr; + int err; if (i2c_dev->hw->has_mst_fifo) { mask = I2C_MST_FIFO_CONTROL_TX_FLUSH | @@ -488,12 +488,19 @@ static int tegra_i2c_flush_fifos(struct tegra_i2c_dev *i2c_dev) val |= mask; i2c_writel(i2c_dev, val, offset); - while (i2c_readl(i2c_dev, offset) & mask) { - if (time_after(jiffies, timeout)) { - dev_warn(i2c_dev->dev, "timeout waiting for fifo flush\n"); - return -ETIMEDOUT; - } - usleep_range(1000, 2000); + reg_offset = tegra_i2c_reg_addr(i2c_dev, offset); + addr = i2c_dev->base + reg_offset; + + if (i2c_dev->is_curr_atomic_xfer) + err = readl_relaxed_poll_timeout_atomic(addr, val, !(val & mask), + 1000, 1000000); + else + err = readl_relaxed_poll_timeout(addr, val, !(val & mask), + 1000, 1000000); + + if (err) { + dev_err(i2c_dev->dev, "failed to flush FIFO\n"); + return err; } return 0; } From e4fc2efbc310cfb665eed558da42ac18084d20bc Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:45 +0300 Subject: [PATCH 124/243] i2c: tegra: Add missing pm_runtime_put() The pm_runtime_get_sync() always bumps refcount regardless of whether it succeeds or fails. Hence driver is responsible for restoring of the RPM refcounting. This patch adds missing RPM puts which restore refcounting in a case of pm_runtime_get_sync() error. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Reviewed-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index ab88cdd70376..4e7d0eec0dd3 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1375,6 +1375,7 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], ret = pm_runtime_get_sync(i2c_dev->dev); if (ret < 0) { dev_err(i2c_dev->dev, "runtime resume failed %d\n", ret); + pm_runtime_put_noidle(i2c_dev->dev); return ret; } @@ -1786,7 +1787,7 @@ static int tegra_i2c_probe(struct platform_device *pdev) ret = pm_runtime_get_sync(i2c_dev->dev); if (ret < 0) { dev_err(&pdev->dev, "runtime resume failed\n"); - goto disable_rpm; + goto put_rpm; } } @@ -1851,7 +1852,6 @@ static int tegra_i2c_probe(struct platform_device *pdev) else tegra_i2c_runtime_suspend(&pdev->dev); -disable_rpm: if (pm_runtime_enabled(&pdev->dev)) pm_runtime_disable(&pdev->dev); From c5418d4cc9f028f3f0fc61c62c781e020113108f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:46 +0300 Subject: [PATCH 125/243] i2c: tegra: Handle potential error of tegra_i2c_flush_fifos() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Technically the tegra_i2c_flush_fifos() may fail and transfer should be aborted in this case, but this shouldn't ever happen in practice unless there is a bug somewhere in the driver. Let's add the error check just for completeness. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Reviewed-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 4e7d0eec0dd3..88d6e7bb14a2 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1177,7 +1177,9 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, bool dma; u16 xfer_time = 100; - tegra_i2c_flush_fifos(i2c_dev); + err = tegra_i2c_flush_fifos(i2c_dev); + if (err) + return err; i2c_dev->msg_buf = msg->buf; i2c_dev->msg_buf_remaining = msg->len; From ed022e5dd3a044b95a68d685d2d2871ce31351df Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:47 +0300 Subject: [PATCH 126/243] i2c: tegra: Mask interrupt in tegra_i2c_issue_bus_clear() The tegra_i2c_issue_bus_clear() may fail and in this case BUS_CLR_DONE stays unmasked. Hence let's mask it for consistency. This patch doesn't fix any known problems. Reviewed-by: Andy Shevchenko Reviewed-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 88d6e7bb14a2..1d1ce266255a 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1149,6 +1149,8 @@ static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) time_left = tegra_i2c_wait_completion_timeout( i2c_dev, &i2c_dev->msg_complete, 50); + tegra_i2c_mask_irq(i2c_dev, I2C_INT_BUS_CLR_DONE); + if (time_left == 0) { dev_err(i2c_dev->dev, "timed out for bus clear\n"); return -ETIMEDOUT; From 8548a75f3e8d8d39c1f4ceca04c4febfa8eaa127 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:48 +0300 Subject: [PATCH 127/243] i2c: tegra: Initialize div-clk rate unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It doesn't make sense to conditionalize the div-clk rate changes because rate is fixed and it won't ever change once it's set at the driver's probe time. All further changes are NO-OPs because CCF caches rate and skips rate-change if rate is unchanged. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 1d1ce266255a..720a75439e91 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -293,7 +293,7 @@ struct tegra_i2c_dev { bool is_curr_atomic_xfer; }; -static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit); +static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev); static void dvc_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned long reg) @@ -691,7 +691,7 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) * domain ON. */ if (i2c_dev->is_vi) { - ret = tegra_i2c_init(i2c_dev, true); + ret = tegra_i2c_init(i2c_dev); if (ret) goto disable_div_clk; } @@ -778,7 +778,7 @@ static void tegra_i2c_vi_init(struct tegra_i2c_dev *i2c_dev) i2c_writel(i2c_dev, 0x0, I2C_TLOW_SEXT); } -static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit) +static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) { u32 val; int err; @@ -836,16 +836,14 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit) if (i2c_dev->hw->has_interface_timing_reg && tsu_thd) i2c_writel(i2c_dev, tsu_thd, I2C_INTERFACE_TIMING_1); - if (!clk_reinit) { - clk_multiplier = (tlow + thigh + 2); - clk_multiplier *= (i2c_dev->clk_divisor_non_hs_mode + 1); - err = clk_set_rate(i2c_dev->div_clk, - i2c_dev->bus_clk_rate * clk_multiplier); - if (err) { - dev_err(i2c_dev->dev, - "failed changing clock rate: %d\n", err); - return err; - } + clk_multiplier = tlow + thigh + 2; + clk_multiplier *= i2c_dev->clk_divisor_non_hs_mode + 1; + + err = clk_set_rate(i2c_dev->div_clk, + i2c_dev->bus_clk_rate * clk_multiplier); + if (err) { + dev_err(i2c_dev->dev, "failed to set div-clk rate: %d\n", err); + return err; } if (!i2c_dev->is_dvc && !i2c_dev->is_vi) { @@ -1319,7 +1317,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (!time_left && !completion_done(&i2c_dev->dma_complete)) { dev_err(i2c_dev->dev, "DMA transfer timeout\n"); - tegra_i2c_init(i2c_dev, true); + tegra_i2c_init(i2c_dev); return -ETIMEDOUT; } @@ -1340,7 +1338,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (time_left == 0) { dev_err(i2c_dev->dev, "i2c transfer timed out\n"); - tegra_i2c_init(i2c_dev, true); + tegra_i2c_init(i2c_dev); return -ETIMEDOUT; } @@ -1352,7 +1350,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (likely(i2c_dev->msg_err == I2C_ERR_NONE)) return 0; - tegra_i2c_init(i2c_dev, true); + tegra_i2c_init(i2c_dev); /* start recovery upon arbitration loss in single master mode */ if (i2c_dev->msg_err == I2C_ERR_ARBITRATION_LOST) { if (!i2c_dev->is_multimaster_mode) @@ -1811,7 +1809,7 @@ static int tegra_i2c_probe(struct platform_device *pdev) if (ret < 0) goto disable_div_clk; - ret = tegra_i2c_init(i2c_dev, false); + ret = tegra_i2c_init(i2c_dev); if (ret) { dev_err(&pdev->dev, "Failed to initialize i2c controller\n"); goto release_dma; @@ -1918,7 +1916,7 @@ static int __maybe_unused tegra_i2c_resume(struct device *dev) if (err) return err; - err = tegra_i2c_init(i2c_dev, false); + err = tegra_i2c_init(i2c_dev); if (err) return err; From 8ff727316a0930e308032fc2814d57fdaa30047f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:49 +0300 Subject: [PATCH 128/243] i2c: tegra: Remove i2c_dev.clk_divisor_non_hs_mode member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "non_hs_mode" divisor value is fixed, thus there is no need to have the variable i2c_dev.clk_divisor_non_hs_mode struct member. Let's remove it and move the mode selection into tegra_i2c_init() where it can be united with the timing selection. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Reviewed-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 46 ++++++++++++++++------------------ 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 720a75439e91..85ed0e02d48c 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -250,7 +250,6 @@ struct tegra_i2c_hw_feature { * @msg_buf_remaining: size of unsent data in the message buffer * @msg_read: identifies read transfers * @bus_clk_rate: current I2C bus clock rate - * @clk_divisor_non_hs_mode: clock divider for non-high-speed modes * @is_multimaster_mode: track if I2C controller is in multi-master mode * @tx_dma_chan: DMA transmit channel * @rx_dma_chan: DMA receive channel @@ -281,7 +280,6 @@ struct tegra_i2c_dev { size_t msg_buf_remaining; int msg_read; u32 bus_clk_rate; - u16 clk_divisor_non_hs_mode; bool is_multimaster_mode; struct dma_chan *tx_dma_chan; struct dma_chan *rx_dma_chan; @@ -783,6 +781,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) u32 val; int err; u32 clk_divisor, clk_multiplier; + u32 non_hs_mode; u32 tsu_thd; u8 tlow, thigh; @@ -805,24 +804,33 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) if (i2c_dev->is_vi) tegra_i2c_vi_init(i2c_dev); - /* Make sure clock divisor programmed correctly */ - clk_divisor = FIELD_PREP(I2C_CLK_DIVISOR_HSMODE, - i2c_dev->hw->clk_divisor_hs_mode) | - FIELD_PREP(I2C_CLK_DIVISOR_STD_FAST_MODE, - i2c_dev->clk_divisor_non_hs_mode); - i2c_writel(i2c_dev, clk_divisor, I2C_CLK_DIVISOR); - - if (i2c_dev->bus_clk_rate > I2C_MAX_STANDARD_MODE_FREQ && - i2c_dev->bus_clk_rate <= I2C_MAX_FAST_MODE_PLUS_FREQ) { + switch (i2c_dev->bus_clk_rate) { + case I2C_MAX_STANDARD_MODE_FREQ + 1 ... I2C_MAX_FAST_MODE_PLUS_FREQ: + default: tlow = i2c_dev->hw->tlow_fast_fastplus_mode; thigh = i2c_dev->hw->thigh_fast_fastplus_mode; tsu_thd = i2c_dev->hw->setup_hold_time_fast_fast_plus_mode; - } else { + + if (i2c_dev->bus_clk_rate > I2C_MAX_FAST_MODE_FREQ) + non_hs_mode = i2c_dev->hw->clk_divisor_fast_plus_mode; + else + non_hs_mode = i2c_dev->hw->clk_divisor_fast_mode; + break; + + case 0 ... I2C_MAX_STANDARD_MODE_FREQ: tlow = i2c_dev->hw->tlow_std_mode; thigh = i2c_dev->hw->thigh_std_mode; tsu_thd = i2c_dev->hw->setup_hold_time_std_mode; + non_hs_mode = i2c_dev->hw->clk_divisor_std_mode; + break; } + /* Make sure clock divisor programmed correctly */ + clk_divisor = FIELD_PREP(I2C_CLK_DIVISOR_HSMODE, + i2c_dev->hw->clk_divisor_hs_mode) | + FIELD_PREP(I2C_CLK_DIVISOR_STD_FAST_MODE, non_hs_mode); + i2c_writel(i2c_dev, clk_divisor, I2C_CLK_DIVISOR); + if (i2c_dev->hw->has_interface_timing_reg) { val = FIELD_PREP(I2C_INTERFACE_TIMING_THIGH, thigh) | FIELD_PREP(I2C_INTERFACE_TIMING_TLOW, tlow); @@ -837,7 +845,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) i2c_writel(i2c_dev, tsu_thd, I2C_INTERFACE_TIMING_1); clk_multiplier = tlow + thigh + 2; - clk_multiplier *= i2c_dev->clk_divisor_non_hs_mode + 1; + clk_multiplier *= non_hs_mode + 1; err = clk_set_rate(i2c_dev->div_clk, i2c_dev->bus_clk_rate * clk_multiplier); @@ -1751,18 +1759,6 @@ static int tegra_i2c_probe(struct platform_device *pdev) goto unprepare_fast_clk; } - if (i2c_dev->bus_clk_rate > I2C_MAX_FAST_MODE_FREQ && - i2c_dev->bus_clk_rate <= I2C_MAX_FAST_MODE_PLUS_FREQ) - i2c_dev->clk_divisor_non_hs_mode = - i2c_dev->hw->clk_divisor_fast_plus_mode; - else if (i2c_dev->bus_clk_rate > I2C_MAX_STANDARD_MODE_FREQ && - i2c_dev->bus_clk_rate <= I2C_MAX_FAST_MODE_FREQ) - i2c_dev->clk_divisor_non_hs_mode = - i2c_dev->hw->clk_divisor_fast_mode; - else - i2c_dev->clk_divisor_non_hs_mode = - i2c_dev->hw->clk_divisor_std_mode; - ret = clk_prepare(i2c_dev->div_clk); if (ret < 0) { dev_err(i2c_dev->dev, "Clock prepare failed %d\n", ret); From 76bc845a78c5e6e8741930ea3c66e7d99ae3b8b5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:50 +0300 Subject: [PATCH 129/243] i2c: tegra: Runtime PM always available on Tegra MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The runtime PM is guaranteed to be always available on Tegra after commit 40b2bb1b132a ("ARM: tegra: enforce PM requirement"). Hence let's remove all the RPM-availability checking and handling from the code. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Reviewed-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 85ed0e02d48c..a52c72135390 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1775,18 +1775,10 @@ static int tegra_i2c_probe(struct platform_device *pdev) if (!i2c_dev->is_vi) pm_runtime_irq_safe(&pdev->dev); pm_runtime_enable(&pdev->dev); - if (!pm_runtime_enabled(&pdev->dev)) { - ret = tegra_i2c_runtime_resume(&pdev->dev); - if (ret < 0) { - dev_err(&pdev->dev, "runtime resume failed\n"); - goto unprepare_div_clk; - } - } else { - ret = pm_runtime_get_sync(i2c_dev->dev); - if (ret < 0) { - dev_err(&pdev->dev, "runtime resume failed\n"); - goto put_rpm; - } + ret = pm_runtime_get_sync(i2c_dev->dev); + if (ret < 0) { + dev_err(dev, "runtime resume failed\n"); + goto put_rpm; } if (i2c_dev->is_multimaster_mode) { @@ -1845,15 +1837,8 @@ static int tegra_i2c_probe(struct platform_device *pdev) clk_disable(i2c_dev->div_clk); put_rpm: - if (pm_runtime_enabled(&pdev->dev)) - pm_runtime_put_sync(&pdev->dev); - else - tegra_i2c_runtime_suspend(&pdev->dev); - - if (pm_runtime_enabled(&pdev->dev)) - pm_runtime_disable(&pdev->dev); - -unprepare_div_clk: + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); clk_unprepare(i2c_dev->div_clk); unprepare_slow_clk: @@ -1875,8 +1860,6 @@ static int tegra_i2c_remove(struct platform_device *pdev) clk_disable(i2c_dev->div_clk); pm_runtime_disable(&pdev->dev); - if (!pm_runtime_status_suspended(&pdev->dev)) - tegra_i2c_runtime_suspend(&pdev->dev); clk_unprepare(i2c_dev->div_clk); clk_unprepare(i2c_dev->slow_clk); From d3dfd625ffb0632e2eb1edaf343df6c23bc6fd1e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:51 +0300 Subject: [PATCH 130/243] i2c: tegra: Remove error message used for devm_request_irq() failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The error message prints number of vIRQ, which isn't a useful information. In practice devm_request_irq() never fails, hence let's remove the bogus message in order to make code cleaner. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Reviewed-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index a52c72135390..b813c0976c10 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1807,10 +1807,8 @@ static int tegra_i2c_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, i2c_dev->irq, tegra_i2c_isr, IRQF_NO_SUSPEND, dev_name(&pdev->dev), i2c_dev); - if (ret) { - dev_err(&pdev->dev, "Failed to request irq %i\n", i2c_dev->irq); + if (ret) goto release_dma; - } i2c_set_adapdata(&i2c_dev->adapter, i2c_dev); i2c_dev->adapter.owner = THIS_MODULE; From 85934909678993827fe538d994d6e3da91f6e046 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:52 +0300 Subject: [PATCH 131/243] i2c: tegra: Use reset_control_reset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a single reset_control_reset() instead of assert/deasset couple in order to make code cleaner a tad. Note that the reset_control_reset() uses 1 microsecond delay instead of 2 that was used previously, but this shouldn't matter because one microsecond is a default reset time for most of Tegra peripherals and TRM doesn't mention anything special in regards to I2C controller's reset propagation time. In addition don't ignore potential error of the reset control by emitting a noisy warning if it fails, which will indicate an existence of a severe problem, while still allow machine to boot up. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index b813c0976c10..90ba2f5327c5 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -785,9 +785,16 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) u32 tsu_thd; u8 tlow, thigh; - reset_control_assert(i2c_dev->rst); - udelay(2); - reset_control_deassert(i2c_dev->rst); + /* + * The reset shouldn't ever fail in practice. The failure will be a + * sign of a severe problem that needs to be resolved. Still we don't + * want to fail the initialization completely because this may break + * kernel boot up since voltage regulators use I2C. Hence, we will + * emit a noisy warning on error, which won't stay unnoticed and + * won't hose machine entirely. + */ + err = reset_control_reset(i2c_dev->rst); + WARN_ON_ONCE(err); if (i2c_dev->is_dvc) tegra_dvc_init(i2c_dev); From f7d62a11858ccb6b620ff28e3c9bc639ddd9a1cb Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:53 +0300 Subject: [PATCH 132/243] i2c: tegra: Use devm_platform_get_and_ioremap_resource() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver now uses devm_platform_get_and_ioremap_resource() which replaces the typical boilerplate code and makes code cleaner. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 90ba2f5327c5..c2bbdf92b11f 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1678,12 +1678,12 @@ static int tegra_i2c_probe(struct platform_device *pdev) int irq; int ret; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base_phys = res->start; - base = devm_ioremap_resource(&pdev->dev, res); + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(base)) return PTR_ERR(base); + base_phys = res->start; + res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res) { dev_err(&pdev->dev, "no irq resource\n"); From 20b9a6c3ef2cb41de1bde1a89cbbb2e4abe58467 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:54 +0300 Subject: [PATCH 133/243] i2c: tegra: Use platform_get_irq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use common helper for retrieval of the interrupt number in order to make code cleaner. Note that platform_get_irq() prints error message by itself. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index c2bbdf92b11f..505b5d37077d 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1684,12 +1684,9 @@ static int tegra_i2c_probe(struct platform_device *pdev) base_phys = res->start; - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!res) { - dev_err(&pdev->dev, "no irq resource\n"); - return -EINVAL; - } - irq = res->start; + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; div_clk = devm_clk_get(&pdev->dev, "div-clk"); if (IS_ERR(div_clk)) { From 586a97d907ac348c96bae1b994e59ca6b8643fb6 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:55 +0300 Subject: [PATCH 134/243] i2c: tegra: Use clk-bulk helpers Use clk-bulk helpers and factor out clocks initialization into separate function in order to make code cleaner. The clocks initialization now performed after reset-control initialization in order to avoid a noisy -PROBE_DEFER errors on T186+ from the clk-bulk helper which doesn't silence this error code. Hence reset_control_get() now may return -EPROBE_DEFER on newer Tegra SoCs because they use BPMP driver that provides reset controls and BPMP doesn't come up early during boot. Previously rst was protected by the clocks retrieval and now this patch makes dev_err_probe() to be used for the rst error handling. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 188 ++++++++++++--------------------- 1 file changed, 68 insertions(+), 120 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 505b5d37077d..b389cd0ce23a 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -165,9 +165,6 @@ enum msg_end_type { * @has_continue_xfer_support: Continue transfer supports. * @has_per_pkt_xfer_complete_irq: Has enable/disable capability for transfer * complete interrupt per packet basis. - * @has_single_clk_source: The I2C controller has single clock source. Tegra30 - * and earlier SoCs have two clock sources i.e. div-clk and - * fast-clk. * @has_config_load_reg: Has the config load register to load the new * configuration. * @clk_divisor_hs_mode: Clock divisor in HS mode. @@ -208,7 +205,6 @@ enum msg_end_type { struct tegra_i2c_hw_feature { bool has_continue_xfer_support; bool has_per_pkt_xfer_complete_irq; - bool has_single_clk_source; bool has_config_load_reg; int clk_divisor_hs_mode; int clk_divisor_std_mode; @@ -236,7 +232,8 @@ struct tegra_i2c_hw_feature { * @hw: Tegra I2C HW feature * @adapter: core I2C layer adapter information * @div_clk: clock reference for div clock of I2C controller - * @fast_clk: clock reference for fast clock of I2C controller + * @clocks: array of I2C controller clocks + * @nclocks: number of clocks in the array * @rst: reset control for the I2C controller * @base: ioremapped registers cookie * @base_phys: physical base address of the I2C controller @@ -265,8 +262,8 @@ struct tegra_i2c_dev { const struct tegra_i2c_hw_feature *hw; struct i2c_adapter adapter; struct clk *div_clk; - struct clk *fast_clk; - struct clk *slow_clk; + struct clk_bulk_data clocks[2]; + unsigned int nclocks; struct reset_control *rst; void __iomem *base; phys_addr_t base_phys; @@ -662,25 +659,9 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) if (ret) return ret; - ret = clk_enable(i2c_dev->fast_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, - "Enabling fast clk failed, err %d\n", ret); + ret = clk_bulk_enable(i2c_dev->nclocks, i2c_dev->clocks); + if (ret) return ret; - } - - ret = clk_enable(i2c_dev->slow_clk); - if (ret < 0) { - dev_err(dev, "failed to enable slow clock: %d\n", ret); - goto disable_fast_clk; - } - - ret = clk_enable(i2c_dev->div_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, - "Enabling div clk failed, err %d\n", ret); - goto disable_slow_clk; - } /* * VI I2C device is attached to VE power domain which goes through @@ -691,17 +672,14 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) if (i2c_dev->is_vi) { ret = tegra_i2c_init(i2c_dev); if (ret) - goto disable_div_clk; + goto disable_clocks; } return 0; -disable_div_clk: - clk_disable(i2c_dev->div_clk); -disable_slow_clk: - clk_disable(i2c_dev->slow_clk); -disable_fast_clk: - clk_disable(i2c_dev->fast_clk); +disable_clocks: + clk_bulk_disable(i2c_dev->nclocks, i2c_dev->clocks); + return ret; } @@ -709,9 +687,7 @@ static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) { struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); - clk_disable(i2c_dev->div_clk); - clk_disable(i2c_dev->slow_clk); - clk_disable(i2c_dev->fast_clk); + clk_bulk_disable(i2c_dev->nclocks, i2c_dev->clocks); return pinctrl_pm_select_idle_state(i2c_dev->dev); } @@ -1479,7 +1455,6 @@ static struct i2c_bus_recovery_info tegra_i2c_recovery_info = { static const struct tegra_i2c_hw_feature tegra20_i2c_hw = { .has_continue_xfer_support = false, .has_per_pkt_xfer_complete_irq = false, - .has_single_clk_source = false, .clk_divisor_hs_mode = 3, .clk_divisor_std_mode = 0, .clk_divisor_fast_mode = 0, @@ -1504,7 +1479,6 @@ static const struct tegra_i2c_hw_feature tegra20_i2c_hw = { static const struct tegra_i2c_hw_feature tegra30_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = false, - .has_single_clk_source = false, .clk_divisor_hs_mode = 3, .clk_divisor_std_mode = 0, .clk_divisor_fast_mode = 0, @@ -1529,7 +1503,6 @@ static const struct tegra_i2c_hw_feature tegra30_i2c_hw = { static const struct tegra_i2c_hw_feature tegra114_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = true, - .has_single_clk_source = true, .clk_divisor_hs_mode = 1, .clk_divisor_std_mode = 0x19, .clk_divisor_fast_mode = 0x19, @@ -1554,7 +1527,6 @@ static const struct tegra_i2c_hw_feature tegra114_i2c_hw = { static const struct tegra_i2c_hw_feature tegra124_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = true, - .has_single_clk_source = true, .clk_divisor_hs_mode = 1, .clk_divisor_std_mode = 0x19, .clk_divisor_fast_mode = 0x19, @@ -1579,7 +1551,6 @@ static const struct tegra_i2c_hw_feature tegra124_i2c_hw = { static const struct tegra_i2c_hw_feature tegra210_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = true, - .has_single_clk_source = true, .clk_divisor_hs_mode = 1, .clk_divisor_std_mode = 0x19, .clk_divisor_fast_mode = 0x19, @@ -1604,7 +1575,6 @@ static const struct tegra_i2c_hw_feature tegra210_i2c_hw = { static const struct tegra_i2c_hw_feature tegra186_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = true, - .has_single_clk_source = true, .clk_divisor_hs_mode = 1, .clk_divisor_std_mode = 0x16, .clk_divisor_fast_mode = 0x19, @@ -1629,7 +1599,6 @@ static const struct tegra_i2c_hw_feature tegra186_i2c_hw = { static const struct tegra_i2c_hw_feature tegra194_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = true, - .has_single_clk_source = true, .clk_divisor_hs_mode = 1, .clk_divisor_std_mode = 0x4f, .clk_divisor_fast_mode = 0x3c, @@ -1666,13 +1635,59 @@ static const struct of_device_id tegra_i2c_of_match[] = { }; MODULE_DEVICE_TABLE(of, tegra_i2c_of_match); +static int tegra_i2c_init_clocks(struct tegra_i2c_dev *i2c_dev) +{ + int err; + + i2c_dev->clocks[i2c_dev->nclocks++].id = "div-clk"; + + if (i2c_dev->hw == &tegra20_i2c_hw || i2c_dev->hw == &tegra30_i2c_hw) + i2c_dev->clocks[i2c_dev->nclocks++].id = "fast-clk"; + + if (i2c_dev->is_vi) + i2c_dev->clocks[i2c_dev->nclocks++].id = "slow"; + + err = devm_clk_bulk_get(i2c_dev->dev, i2c_dev->nclocks, + i2c_dev->clocks); + if (err) + return err; + + err = clk_bulk_prepare(i2c_dev->nclocks, i2c_dev->clocks); + if (err) + return err; + + i2c_dev->div_clk = i2c_dev->clocks[0].clk; + + if (!i2c_dev->is_multimaster_mode) + return 0; + + err = clk_enable(i2c_dev->div_clk); + if (err) { + dev_err(i2c_dev->dev, "failed to enable div-clk: %d\n", err); + goto unprepare_clocks; + } + + return 0; + +unprepare_clocks: + clk_bulk_unprepare(i2c_dev->nclocks, i2c_dev->clocks); + + return err; +} + +static void tegra_i2c_release_clocks(struct tegra_i2c_dev *i2c_dev) +{ + if (i2c_dev->is_multimaster_mode) + clk_disable(i2c_dev->div_clk); + + clk_bulk_unprepare(i2c_dev->nclocks, i2c_dev->clocks); +} + static int tegra_i2c_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct tegra_i2c_dev *i2c_dev; struct resource *res; - struct clk *div_clk; - struct clk *fast_clk; void __iomem *base; phys_addr_t base_phys; int irq; @@ -1688,21 +1703,12 @@ static int tegra_i2c_probe(struct platform_device *pdev) if (irq < 0) return irq; - div_clk = devm_clk_get(&pdev->dev, "div-clk"); - if (IS_ERR(div_clk)) { - if (PTR_ERR(div_clk) != -EPROBE_DEFER) - dev_err(&pdev->dev, "missing controller clock\n"); - - return PTR_ERR(div_clk); - } - i2c_dev = devm_kzalloc(&pdev->dev, sizeof(*i2c_dev), GFP_KERNEL); if (!i2c_dev) return -ENOMEM; i2c_dev->base = base; i2c_dev->base_phys = base_phys; - i2c_dev->div_clk = div_clk; i2c_dev->adapter.algo = &tegra_i2c_algo; i2c_dev->adapter.retries = 1; i2c_dev->adapter.timeout = 6 * HZ; @@ -1712,12 +1718,17 @@ static int tegra_i2c_probe(struct platform_device *pdev) i2c_dev->rst = devm_reset_control_get_exclusive(&pdev->dev, "i2c"); if (IS_ERR(i2c_dev->rst)) { - dev_err(&pdev->dev, "missing controller reset\n"); + dev_err_probe(&pdev->dev, PTR_ERR(i2c_dev->rst), + "failed to get reset control\n"); return PTR_ERR(i2c_dev->rst); } tegra_i2c_parse_dt(i2c_dev); + ret = tegra_i2c_init_clocks(i2c_dev); + if (ret) + return ret; + i2c_dev->hw = of_device_get_match_data(&pdev->dev); i2c_dev->is_dvc = of_device_is_compatible(pdev->dev.of_node, "nvidia,tegra20-i2c-dvc"); @@ -1729,46 +1740,8 @@ static int tegra_i2c_probe(struct platform_device *pdev) init_completion(&i2c_dev->msg_complete); init_completion(&i2c_dev->dma_complete); - if (!i2c_dev->hw->has_single_clk_source) { - fast_clk = devm_clk_get(&pdev->dev, "fast-clk"); - if (IS_ERR(fast_clk)) { - dev_err(&pdev->dev, "missing fast clock\n"); - return PTR_ERR(fast_clk); - } - i2c_dev->fast_clk = fast_clk; - } - - if (i2c_dev->is_vi) { - i2c_dev->slow_clk = devm_clk_get(dev, "slow"); - if (IS_ERR(i2c_dev->slow_clk)) { - if (PTR_ERR(i2c_dev->slow_clk) != -EPROBE_DEFER) - dev_err(dev, "failed to get slow clock: %ld\n", - PTR_ERR(i2c_dev->slow_clk)); - - return PTR_ERR(i2c_dev->slow_clk); - } - } - platform_set_drvdata(pdev, i2c_dev); - ret = clk_prepare(i2c_dev->fast_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, "Clock prepare failed %d\n", ret); - return ret; - } - - ret = clk_prepare(i2c_dev->slow_clk); - if (ret < 0) { - dev_err(dev, "failed to prepare slow clock: %d\n", ret); - goto unprepare_fast_clk; - } - - ret = clk_prepare(i2c_dev->div_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, "Clock prepare failed %d\n", ret); - goto unprepare_slow_clk; - } - /* * VI I2C is in VE power domain which is not always on and not * an IRQ safe. So, IRQ safe device can't be attached to a non-IRQ @@ -1785,21 +1758,12 @@ static int tegra_i2c_probe(struct platform_device *pdev) goto put_rpm; } - if (i2c_dev->is_multimaster_mode) { - ret = clk_enable(i2c_dev->div_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, "div_clk enable failed %d\n", - ret); - goto put_rpm; - } - } - if (i2c_dev->hw->supports_bus_clear) i2c_dev->adapter.bus_recovery_info = &tegra_i2c_recovery_info; ret = tegra_i2c_init_dma(i2c_dev); if (ret < 0) - goto disable_div_clk; + goto put_rpm; ret = tegra_i2c_init(i2c_dev); if (ret) { @@ -1834,20 +1798,10 @@ static int tegra_i2c_probe(struct platform_device *pdev) release_dma: tegra_i2c_release_dma(i2c_dev); -disable_div_clk: - if (i2c_dev->is_multimaster_mode) - clk_disable(i2c_dev->div_clk); - put_rpm: pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - clk_unprepare(i2c_dev->div_clk); - -unprepare_slow_clk: - clk_unprepare(i2c_dev->slow_clk); - -unprepare_fast_clk: - clk_unprepare(i2c_dev->fast_clk); + tegra_i2c_release_clocks(i2c_dev); return ret; } @@ -1858,16 +1812,10 @@ static int tegra_i2c_remove(struct platform_device *pdev) i2c_del_adapter(&i2c_dev->adapter); - if (i2c_dev->is_multimaster_mode) - clk_disable(i2c_dev->div_clk); - pm_runtime_disable(&pdev->dev); - clk_unprepare(i2c_dev->div_clk); - clk_unprepare(i2c_dev->slow_clk); - clk_unprepare(i2c_dev->fast_clk); - tegra_i2c_release_dma(i2c_dev); + tegra_i2c_release_clocks(i2c_dev); return 0; } From 6fe3e2015faaf85e4c81b86f733316820e4af249 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:56 +0300 Subject: [PATCH 135/243] i2c: tegra: Move out all device-tree parsing into tegra_i2c_parse_dt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move out code related to device-tree parsing from the probe function into tegra_i2c_parse_dt() in order to make code more consistent. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index b389cd0ce23a..9486fcba655c 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1428,6 +1428,12 @@ static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev) multi_mode = of_property_read_bool(np, "multi-master"); i2c_dev->is_multimaster_mode = multi_mode; + + if (of_device_is_compatible(np, "nvidia,tegra20-i2c-dvc")) + i2c_dev->is_dvc = true; + + if (of_device_is_compatible(np, "nvidia,tegra210-i2c-vi")) + i2c_dev->is_vi = true; } static const struct i2c_algorithm tegra_i2c_algo = { @@ -1730,10 +1736,6 @@ static int tegra_i2c_probe(struct platform_device *pdev) return ret; i2c_dev->hw = of_device_get_match_data(&pdev->dev); - i2c_dev->is_dvc = of_device_is_compatible(pdev->dev.of_node, - "nvidia,tegra20-i2c-dvc"); - i2c_dev->is_vi = of_device_is_compatible(dev->of_node, - "nvidia,tegra210-i2c-vi"); i2c_dev->adapter.quirks = i2c_dev->hw->quirks; i2c_dev->dma_buf_size = i2c_dev->adapter.quirks->max_write_len + I2C_PACKET_HEADER_SIZE; From 55c52f16a017ca3b1546709f8cfa5a4c856926eb Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:57 +0300 Subject: [PATCH 136/243] i2c: tegra: Clean up probe function The driver's probe function code is a bit difficult to read. This patch reorders code of the probe function, forming groups of code that are easy to work with. The probe tear-down order now matches the driver-removal order. Reviewed-by: Thierry Reding Reviewed-by: Andy Shevchenko Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 106 ++++++++++++++++----------------- 1 file changed, 52 insertions(+), 54 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 9486fcba655c..56981a5506ae 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -440,6 +440,9 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) i2c_dev->tx_dma_chan = chan; + i2c_dev->dma_buf_size = i2c_dev->hw->quirks->max_write_len + + I2C_PACKET_HEADER_SIZE; + dma_buf = dma_alloc_coherent(i2c_dev->dev, i2c_dev->dma_buf_size, &dma_phys, GFP_KERNEL | __GFP_NOWARN); if (!dma_buf) { @@ -1694,34 +1697,42 @@ static int tegra_i2c_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct tegra_i2c_dev *i2c_dev; struct resource *res; - void __iomem *base; - phys_addr_t base_phys; - int irq; int ret; - base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); - if (IS_ERR(base)) - return PTR_ERR(base); - - base_phys = res->start; - - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - i2c_dev = devm_kzalloc(&pdev->dev, sizeof(*i2c_dev), GFP_KERNEL); if (!i2c_dev) return -ENOMEM; - i2c_dev->base = base; - i2c_dev->base_phys = base_phys; - i2c_dev->adapter.algo = &tegra_i2c_algo; - i2c_dev->adapter.retries = 1; - i2c_dev->adapter.timeout = 6 * HZ; - i2c_dev->irq = irq; + platform_set_drvdata(pdev, i2c_dev); + + init_completion(&i2c_dev->msg_complete); + init_completion(&i2c_dev->dma_complete); + + i2c_dev->hw = of_device_get_match_data(&pdev->dev); i2c_dev->cont_id = pdev->id; i2c_dev->dev = &pdev->dev; + i2c_dev->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(i2c_dev->base)) + return PTR_ERR(i2c_dev->base); + + i2c_dev->base_phys = res->start; + + ret = platform_get_irq(pdev, 0); + if (ret < 0) + return ret; + + i2c_dev->irq = ret; + + /* interrupt will be enabled during of transfer time */ + irq_set_status_flags(i2c_dev->irq, IRQ_NOAUTOEN); + + ret = devm_request_irq(&pdev->dev, i2c_dev->irq, tegra_i2c_isr, + IRQF_NO_SUSPEND, dev_name(&pdev->dev), + i2c_dev); + if (ret) + return ret; + i2c_dev->rst = devm_reset_control_get_exclusive(&pdev->dev, "i2c"); if (IS_ERR(i2c_dev->rst)) { dev_err_probe(&pdev->dev, PTR_ERR(i2c_dev->rst), @@ -1735,14 +1746,9 @@ static int tegra_i2c_probe(struct platform_device *pdev) if (ret) return ret; - i2c_dev->hw = of_device_get_match_data(&pdev->dev); - i2c_dev->adapter.quirks = i2c_dev->hw->quirks; - i2c_dev->dma_buf_size = i2c_dev->adapter.quirks->max_write_len + - I2C_PACKET_HEADER_SIZE; - init_completion(&i2c_dev->msg_complete); - init_completion(&i2c_dev->dma_complete); - - platform_set_drvdata(pdev, i2c_dev); + ret = tegra_i2c_init_dma(i2c_dev); + if (ret) + goto release_clocks; /* * VI I2C is in VE power domain which is not always on and not @@ -1760,49 +1766,41 @@ static int tegra_i2c_probe(struct platform_device *pdev) goto put_rpm; } + ret = tegra_i2c_init(i2c_dev); + if (ret) + goto put_rpm; + + i2c_set_adapdata(&i2c_dev->adapter, i2c_dev); + i2c_dev->adapter.dev.of_node = pdev->dev.of_node; + i2c_dev->adapter.dev.parent = &pdev->dev; + i2c_dev->adapter.retries = 1; + i2c_dev->adapter.timeout = 6 * HZ; + i2c_dev->adapter.quirks = i2c_dev->hw->quirks; + i2c_dev->adapter.owner = THIS_MODULE; + i2c_dev->adapter.class = I2C_CLASS_DEPRECATED; + i2c_dev->adapter.algo = &tegra_i2c_algo; + i2c_dev->adapter.nr = pdev->id; + if (i2c_dev->hw->supports_bus_clear) i2c_dev->adapter.bus_recovery_info = &tegra_i2c_recovery_info; - ret = tegra_i2c_init_dma(i2c_dev); - if (ret < 0) - goto put_rpm; - - ret = tegra_i2c_init(i2c_dev); - if (ret) { - dev_err(&pdev->dev, "Failed to initialize i2c controller\n"); - goto release_dma; - } - - irq_set_status_flags(i2c_dev->irq, IRQ_NOAUTOEN); - - ret = devm_request_irq(&pdev->dev, i2c_dev->irq, tegra_i2c_isr, - IRQF_NO_SUSPEND, dev_name(&pdev->dev), i2c_dev); - if (ret) - goto release_dma; - - i2c_set_adapdata(&i2c_dev->adapter, i2c_dev); - i2c_dev->adapter.owner = THIS_MODULE; - i2c_dev->adapter.class = I2C_CLASS_DEPRECATED; strlcpy(i2c_dev->adapter.name, dev_name(&pdev->dev), sizeof(i2c_dev->adapter.name)); - i2c_dev->adapter.dev.parent = &pdev->dev; - i2c_dev->adapter.nr = pdev->id; - i2c_dev->adapter.dev.of_node = pdev->dev.of_node; ret = i2c_add_numbered_adapter(&i2c_dev->adapter); if (ret) - goto release_dma; + goto put_rpm; pm_runtime_put(&pdev->dev); return 0; -release_dma: - tegra_i2c_release_dma(i2c_dev); - put_rpm: pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); + + tegra_i2c_release_dma(i2c_dev); +release_clocks: tegra_i2c_release_clocks(i2c_dev); return ret; From df384fa58a321a88b22499a43cb5c5d0fb9f5364 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:58 +0300 Subject: [PATCH 137/243] i2c: tegra: Reorder location of functions in the code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reorder location of functions in the code in order to have definition of functions closer to the place of the invocation. This change makes easier to navigate around the code and removes the need to have a prototype for tegra_i2c_init(). Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 544 ++++++++++++++++----------------- 1 file changed, 271 insertions(+), 273 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 56981a5506ae..990eeb832492 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -288,8 +288,6 @@ struct tegra_i2c_dev { bool is_curr_atomic_xfer; }; -static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev); - static void dvc_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned long reg) { @@ -466,6 +464,56 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) return err; } +/* + * One of the Tegra I2C blocks is inside the DVC (Digital Voltage Controller) + * block. This block is identical to the rest of the I2C blocks, except that + * it only supports master mode, it has registers moved around, and it needs + * some extra init to get it into I2C mode. The register moves are handled + * by i2c_readl and i2c_writel + */ +static void tegra_dvc_init(struct tegra_i2c_dev *i2c_dev) +{ + u32 val; + + val = dvc_readl(i2c_dev, DVC_CTRL_REG3); + val |= DVC_CTRL_REG3_SW_PROG; + val |= DVC_CTRL_REG3_I2C_DONE_INTR_EN; + dvc_writel(i2c_dev, val, DVC_CTRL_REG3); + + val = dvc_readl(i2c_dev, DVC_CTRL_REG1); + val |= DVC_CTRL_REG1_INTR_EN; + dvc_writel(i2c_dev, val, DVC_CTRL_REG1); +} + +static void tegra_i2c_vi_init(struct tegra_i2c_dev *i2c_dev) +{ + u32 value; + + value = FIELD_PREP(I2C_INTERFACE_TIMING_THIGH, 2) | + FIELD_PREP(I2C_INTERFACE_TIMING_TLOW, 4); + i2c_writel(i2c_dev, value, I2C_INTERFACE_TIMING_0); + + value = FIELD_PREP(I2C_INTERFACE_TIMING_TBUF, 4) | + FIELD_PREP(I2C_INTERFACE_TIMING_TSU_STO, 7) | + FIELD_PREP(I2C_INTERFACE_TIMING_THD_STA, 4) | + FIELD_PREP(I2C_INTERFACE_TIMING_TSU_STA, 4); + i2c_writel(i2c_dev, value, I2C_INTERFACE_TIMING_1); + + value = FIELD_PREP(I2C_HS_INTERFACE_TIMING_THIGH, 3) | + FIELD_PREP(I2C_HS_INTERFACE_TIMING_TLOW, 8); + i2c_writel(i2c_dev, value, I2C_HS_INTERFACE_TIMING_0); + + value = FIELD_PREP(I2C_HS_INTERFACE_TIMING_TSU_STO, 11) | + FIELD_PREP(I2C_HS_INTERFACE_TIMING_THD_STA, 11) | + FIELD_PREP(I2C_HS_INTERFACE_TIMING_TSU_STA, 11); + i2c_writel(i2c_dev, value, I2C_HS_INTERFACE_TIMING_1); + + value = FIELD_PREP(I2C_BC_SCLK_THRESHOLD, 9) | I2C_BC_STOP_COND; + i2c_writel(i2c_dev, value, I2C_BUS_CLEAR_CNFG); + + i2c_writel(i2c_dev, 0x0, I2C_TLOW_SEXT); +} + static int tegra_i2c_flush_fifos(struct tegra_i2c_dev *i2c_dev) { u32 mask, val, offset, reg_offset; @@ -503,6 +551,164 @@ static int tegra_i2c_flush_fifos(struct tegra_i2c_dev *i2c_dev) return 0; } +static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) +{ + unsigned long reg_offset; + void __iomem *addr; + u32 val; + int err; + + if (i2c_dev->hw->has_config_load_reg) { + reg_offset = tegra_i2c_reg_addr(i2c_dev, I2C_CONFIG_LOAD); + addr = i2c_dev->base + reg_offset; + i2c_writel(i2c_dev, I2C_MSTR_CONFIG_LOAD, I2C_CONFIG_LOAD); + + if (i2c_dev->is_curr_atomic_xfer) + err = readl_relaxed_poll_timeout_atomic( + addr, val, val == 0, 1000, + I2C_CONFIG_LOAD_TIMEOUT); + else + err = readl_relaxed_poll_timeout( + addr, val, val == 0, 1000, + I2C_CONFIG_LOAD_TIMEOUT); + + if (err) { + dev_warn(i2c_dev->dev, + "timeout waiting for config load\n"); + return err; + } + } + + return 0; +} + +static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) +{ + u32 val; + int err; + u32 clk_divisor, clk_multiplier; + u32 non_hs_mode; + u32 tsu_thd; + u8 tlow, thigh; + + /* + * The reset shouldn't ever fail in practice. The failure will be a + * sign of a severe problem that needs to be resolved. Still we don't + * want to fail the initialization completely because this may break + * kernel boot up since voltage regulators use I2C. Hence, we will + * emit a noisy warning on error, which won't stay unnoticed and + * won't hose machine entirely. + */ + err = reset_control_reset(i2c_dev->rst); + WARN_ON_ONCE(err); + + if (i2c_dev->is_dvc) + tegra_dvc_init(i2c_dev); + + val = I2C_CNFG_NEW_MASTER_FSM | I2C_CNFG_PACKET_MODE_EN | + FIELD_PREP(I2C_CNFG_DEBOUNCE_CNT, 2); + + if (i2c_dev->hw->has_multi_master_mode) + val |= I2C_CNFG_MULTI_MASTER_MODE; + + i2c_writel(i2c_dev, val, I2C_CNFG); + i2c_writel(i2c_dev, 0, I2C_INT_MASK); + + if (i2c_dev->is_vi) + tegra_i2c_vi_init(i2c_dev); + + switch (i2c_dev->bus_clk_rate) { + case I2C_MAX_STANDARD_MODE_FREQ + 1 ... I2C_MAX_FAST_MODE_PLUS_FREQ: + default: + tlow = i2c_dev->hw->tlow_fast_fastplus_mode; + thigh = i2c_dev->hw->thigh_fast_fastplus_mode; + tsu_thd = i2c_dev->hw->setup_hold_time_fast_fast_plus_mode; + + if (i2c_dev->bus_clk_rate > I2C_MAX_FAST_MODE_FREQ) + non_hs_mode = i2c_dev->hw->clk_divisor_fast_plus_mode; + else + non_hs_mode = i2c_dev->hw->clk_divisor_fast_mode; + break; + + case 0 ... I2C_MAX_STANDARD_MODE_FREQ: + tlow = i2c_dev->hw->tlow_std_mode; + thigh = i2c_dev->hw->thigh_std_mode; + tsu_thd = i2c_dev->hw->setup_hold_time_std_mode; + non_hs_mode = i2c_dev->hw->clk_divisor_std_mode; + break; + } + + /* Make sure clock divisor programmed correctly */ + clk_divisor = FIELD_PREP(I2C_CLK_DIVISOR_HSMODE, + i2c_dev->hw->clk_divisor_hs_mode) | + FIELD_PREP(I2C_CLK_DIVISOR_STD_FAST_MODE, non_hs_mode); + i2c_writel(i2c_dev, clk_divisor, I2C_CLK_DIVISOR); + + if (i2c_dev->hw->has_interface_timing_reg) { + val = FIELD_PREP(I2C_INTERFACE_TIMING_THIGH, thigh) | + FIELD_PREP(I2C_INTERFACE_TIMING_TLOW, tlow); + i2c_writel(i2c_dev, val, I2C_INTERFACE_TIMING_0); + } + + /* + * configure setup and hold times only when tsu_thd is non-zero. + * otherwise, preserve the chip default values + */ + if (i2c_dev->hw->has_interface_timing_reg && tsu_thd) + i2c_writel(i2c_dev, tsu_thd, I2C_INTERFACE_TIMING_1); + + clk_multiplier = tlow + thigh + 2; + clk_multiplier *= non_hs_mode + 1; + + err = clk_set_rate(i2c_dev->div_clk, + i2c_dev->bus_clk_rate * clk_multiplier); + if (err) { + dev_err(i2c_dev->dev, "failed to set div-clk rate: %d\n", err); + return err; + } + + if (!i2c_dev->is_dvc && !i2c_dev->is_vi) { + u32 sl_cfg = i2c_readl(i2c_dev, I2C_SL_CNFG); + + sl_cfg |= I2C_SL_CNFG_NACK | I2C_SL_CNFG_NEWSL; + i2c_writel(i2c_dev, sl_cfg, I2C_SL_CNFG); + i2c_writel(i2c_dev, 0xfc, I2C_SL_ADDR1); + i2c_writel(i2c_dev, 0x00, I2C_SL_ADDR2); + } + + err = tegra_i2c_flush_fifos(i2c_dev); + if (err) + return err; + + if (i2c_dev->is_multimaster_mode && i2c_dev->hw->has_slcg_override_reg) + i2c_writel(i2c_dev, I2C_MST_CORE_CLKEN_OVR, I2C_CLKEN_OVERRIDE); + + err = tegra_i2c_wait_for_config_load(i2c_dev); + if (err) + return err; + + return 0; +} + +static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev) +{ + u32 cnfg; + + /* + * NACK interrupt is generated before the I2C controller generates + * the STOP condition on the bus. So wait for 2 clock periods + * before disabling the controller so that the STOP condition has + * been delivered properly. + */ + udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->bus_clk_rate)); + + cnfg = i2c_readl(i2c_dev, I2C_CNFG); + if (cnfg & I2C_CNFG_PACKET_MODE_EN) + i2c_writel(i2c_dev, cnfg & ~I2C_CNFG_PACKET_MODE_EN, I2C_CNFG); + + return tegra_i2c_wait_for_config_load(i2c_dev); +} + static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) { u32 val; @@ -632,256 +838,6 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) return 0; } -/* - * One of the Tegra I2C blocks is inside the DVC (Digital Voltage Controller) - * block. This block is identical to the rest of the I2C blocks, except that - * it only supports master mode, it has registers moved around, and it needs - * some extra init to get it into I2C mode. The register moves are handled - * by i2c_readl and i2c_writel - */ -static void tegra_dvc_init(struct tegra_i2c_dev *i2c_dev) -{ - u32 val; - - val = dvc_readl(i2c_dev, DVC_CTRL_REG3); - val |= DVC_CTRL_REG3_SW_PROG; - val |= DVC_CTRL_REG3_I2C_DONE_INTR_EN; - dvc_writel(i2c_dev, val, DVC_CTRL_REG3); - - val = dvc_readl(i2c_dev, DVC_CTRL_REG1); - val |= DVC_CTRL_REG1_INTR_EN; - dvc_writel(i2c_dev, val, DVC_CTRL_REG1); -} - -static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) -{ - struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); - int ret; - - ret = pinctrl_pm_select_default_state(i2c_dev->dev); - if (ret) - return ret; - - ret = clk_bulk_enable(i2c_dev->nclocks, i2c_dev->clocks); - if (ret) - return ret; - - /* - * VI I2C device is attached to VE power domain which goes through - * power ON/OFF during PM runtime resume/suspend. So, controller - * should go through reset and need to re-initialize after power - * domain ON. - */ - if (i2c_dev->is_vi) { - ret = tegra_i2c_init(i2c_dev); - if (ret) - goto disable_clocks; - } - - return 0; - -disable_clocks: - clk_bulk_disable(i2c_dev->nclocks, i2c_dev->clocks); - - return ret; -} - -static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) -{ - struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); - - clk_bulk_disable(i2c_dev->nclocks, i2c_dev->clocks); - - return pinctrl_pm_select_idle_state(i2c_dev->dev); -} - -static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) -{ - unsigned long reg_offset; - void __iomem *addr; - u32 val; - int err; - - if (i2c_dev->hw->has_config_load_reg) { - reg_offset = tegra_i2c_reg_addr(i2c_dev, I2C_CONFIG_LOAD); - addr = i2c_dev->base + reg_offset; - i2c_writel(i2c_dev, I2C_MSTR_CONFIG_LOAD, I2C_CONFIG_LOAD); - - if (i2c_dev->is_curr_atomic_xfer) - err = readl_relaxed_poll_timeout_atomic( - addr, val, val == 0, 1000, - I2C_CONFIG_LOAD_TIMEOUT); - else - err = readl_relaxed_poll_timeout( - addr, val, val == 0, 1000, - I2C_CONFIG_LOAD_TIMEOUT); - - if (err) { - dev_warn(i2c_dev->dev, - "timeout waiting for config load\n"); - return err; - } - } - - return 0; -} - -static void tegra_i2c_vi_init(struct tegra_i2c_dev *i2c_dev) -{ - u32 value; - - value = FIELD_PREP(I2C_INTERFACE_TIMING_THIGH, 2) | - FIELD_PREP(I2C_INTERFACE_TIMING_TLOW, 4); - i2c_writel(i2c_dev, value, I2C_INTERFACE_TIMING_0); - - value = FIELD_PREP(I2C_INTERFACE_TIMING_TBUF, 4) | - FIELD_PREP(I2C_INTERFACE_TIMING_TSU_STO, 7) | - FIELD_PREP(I2C_INTERFACE_TIMING_THD_STA, 4) | - FIELD_PREP(I2C_INTERFACE_TIMING_TSU_STA, 4); - i2c_writel(i2c_dev, value, I2C_INTERFACE_TIMING_1); - - value = FIELD_PREP(I2C_HS_INTERFACE_TIMING_THIGH, 3) | - FIELD_PREP(I2C_HS_INTERFACE_TIMING_TLOW, 8); - i2c_writel(i2c_dev, value, I2C_HS_INTERFACE_TIMING_0); - - value = FIELD_PREP(I2C_HS_INTERFACE_TIMING_TSU_STO, 11) | - FIELD_PREP(I2C_HS_INTERFACE_TIMING_THD_STA, 11) | - FIELD_PREP(I2C_HS_INTERFACE_TIMING_TSU_STA, 11); - i2c_writel(i2c_dev, value, I2C_HS_INTERFACE_TIMING_1); - - value = FIELD_PREP(I2C_BC_SCLK_THRESHOLD, 9) | I2C_BC_STOP_COND; - i2c_writel(i2c_dev, value, I2C_BUS_CLEAR_CNFG); - - i2c_writel(i2c_dev, 0x0, I2C_TLOW_SEXT); -} - -static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) -{ - u32 val; - int err; - u32 clk_divisor, clk_multiplier; - u32 non_hs_mode; - u32 tsu_thd; - u8 tlow, thigh; - - /* - * The reset shouldn't ever fail in practice. The failure will be a - * sign of a severe problem that needs to be resolved. Still we don't - * want to fail the initialization completely because this may break - * kernel boot up since voltage regulators use I2C. Hence, we will - * emit a noisy warning on error, which won't stay unnoticed and - * won't hose machine entirely. - */ - err = reset_control_reset(i2c_dev->rst); - WARN_ON_ONCE(err); - - if (i2c_dev->is_dvc) - tegra_dvc_init(i2c_dev); - - val = I2C_CNFG_NEW_MASTER_FSM | I2C_CNFG_PACKET_MODE_EN | - FIELD_PREP(I2C_CNFG_DEBOUNCE_CNT, 2); - - if (i2c_dev->hw->has_multi_master_mode) - val |= I2C_CNFG_MULTI_MASTER_MODE; - - i2c_writel(i2c_dev, val, I2C_CNFG); - i2c_writel(i2c_dev, 0, I2C_INT_MASK); - - if (i2c_dev->is_vi) - tegra_i2c_vi_init(i2c_dev); - - switch (i2c_dev->bus_clk_rate) { - case I2C_MAX_STANDARD_MODE_FREQ + 1 ... I2C_MAX_FAST_MODE_PLUS_FREQ: - default: - tlow = i2c_dev->hw->tlow_fast_fastplus_mode; - thigh = i2c_dev->hw->thigh_fast_fastplus_mode; - tsu_thd = i2c_dev->hw->setup_hold_time_fast_fast_plus_mode; - - if (i2c_dev->bus_clk_rate > I2C_MAX_FAST_MODE_FREQ) - non_hs_mode = i2c_dev->hw->clk_divisor_fast_plus_mode; - else - non_hs_mode = i2c_dev->hw->clk_divisor_fast_mode; - break; - - case 0 ... I2C_MAX_STANDARD_MODE_FREQ: - tlow = i2c_dev->hw->tlow_std_mode; - thigh = i2c_dev->hw->thigh_std_mode; - tsu_thd = i2c_dev->hw->setup_hold_time_std_mode; - non_hs_mode = i2c_dev->hw->clk_divisor_std_mode; - break; - } - - /* Make sure clock divisor programmed correctly */ - clk_divisor = FIELD_PREP(I2C_CLK_DIVISOR_HSMODE, - i2c_dev->hw->clk_divisor_hs_mode) | - FIELD_PREP(I2C_CLK_DIVISOR_STD_FAST_MODE, non_hs_mode); - i2c_writel(i2c_dev, clk_divisor, I2C_CLK_DIVISOR); - - if (i2c_dev->hw->has_interface_timing_reg) { - val = FIELD_PREP(I2C_INTERFACE_TIMING_THIGH, thigh) | - FIELD_PREP(I2C_INTERFACE_TIMING_TLOW, tlow); - i2c_writel(i2c_dev, val, I2C_INTERFACE_TIMING_0); - } - - /* - * configure setup and hold times only when tsu_thd is non-zero. - * otherwise, preserve the chip default values - */ - if (i2c_dev->hw->has_interface_timing_reg && tsu_thd) - i2c_writel(i2c_dev, tsu_thd, I2C_INTERFACE_TIMING_1); - - clk_multiplier = tlow + thigh + 2; - clk_multiplier *= non_hs_mode + 1; - - err = clk_set_rate(i2c_dev->div_clk, - i2c_dev->bus_clk_rate * clk_multiplier); - if (err) { - dev_err(i2c_dev->dev, "failed to set div-clk rate: %d\n", err); - return err; - } - - if (!i2c_dev->is_dvc && !i2c_dev->is_vi) { - u32 sl_cfg = i2c_readl(i2c_dev, I2C_SL_CNFG); - - sl_cfg |= I2C_SL_CNFG_NACK | I2C_SL_CNFG_NEWSL; - i2c_writel(i2c_dev, sl_cfg, I2C_SL_CNFG); - i2c_writel(i2c_dev, 0xfc, I2C_SL_ADDR1); - i2c_writel(i2c_dev, 0x00, I2C_SL_ADDR2); - } - - err = tegra_i2c_flush_fifos(i2c_dev); - if (err) - return err; - - if (i2c_dev->is_multimaster_mode && i2c_dev->hw->has_slcg_override_reg) - i2c_writel(i2c_dev, I2C_MST_CORE_CLKEN_OVR, I2C_CLKEN_OVERRIDE); - - err = tegra_i2c_wait_for_config_load(i2c_dev); - if (err) - return err; - - return 0; -} - -static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev) -{ - u32 cnfg; - - /* - * NACK interrupt is generated before the I2C controller generates - * the STOP condition on the bus. So wait for 2 clock periods - * before disabling the controller so that the STOP condition has - * been delivered properly. - */ - udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->bus_clk_rate)); - - cnfg = i2c_readl(i2c_dev, I2C_CNFG); - if (cnfg & I2C_CNFG_PACKET_MODE_EN) - i2c_writel(i2c_dev, cnfg & ~I2C_CNFG_PACKET_MODE_EN, I2C_CNFG); - - return tegra_i2c_wait_for_config_load(i2c_dev); -} - static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) { u32 status; @@ -1418,27 +1374,6 @@ static u32 tegra_i2c_func(struct i2c_adapter *adap) return ret; } -static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev) -{ - struct device_node *np = i2c_dev->dev->of_node; - int ret; - bool multi_mode; - - ret = of_property_read_u32(np, "clock-frequency", - &i2c_dev->bus_clk_rate); - if (ret) - i2c_dev->bus_clk_rate = I2C_MAX_STANDARD_MODE_FREQ; /* default clock rate */ - - multi_mode = of_property_read_bool(np, "multi-master"); - i2c_dev->is_multimaster_mode = multi_mode; - - if (of_device_is_compatible(np, "nvidia,tegra20-i2c-dvc")) - i2c_dev->is_dvc = true; - - if (of_device_is_compatible(np, "nvidia,tegra210-i2c-vi")) - i2c_dev->is_vi = true; -} - static const struct i2c_algorithm tegra_i2c_algo = { .master_xfer = tegra_i2c_xfer, .master_xfer_atomic = tegra_i2c_xfer_atomic, @@ -1644,6 +1579,27 @@ static const struct of_device_id tegra_i2c_of_match[] = { }; MODULE_DEVICE_TABLE(of, tegra_i2c_of_match); +static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev) +{ + struct device_node *np = i2c_dev->dev->of_node; + int ret; + bool multi_mode; + + ret = of_property_read_u32(np, "clock-frequency", + &i2c_dev->bus_clk_rate); + if (ret) + i2c_dev->bus_clk_rate = I2C_MAX_STANDARD_MODE_FREQ; /* default clock rate */ + + multi_mode = of_property_read_bool(np, "multi-master"); + i2c_dev->is_multimaster_mode = multi_mode; + + if (of_device_is_compatible(np, "nvidia,tegra20-i2c-dvc")) + i2c_dev->is_dvc = true; + + if (of_device_is_compatible(np, "nvidia,tegra210-i2c-vi")) + i2c_dev->is_vi = true; +} + static int tegra_i2c_init_clocks(struct tegra_i2c_dev *i2c_dev) { int err; @@ -1819,6 +1775,48 @@ static int tegra_i2c_remove(struct platform_device *pdev) return 0; } +static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) +{ + struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); + int ret; + + ret = pinctrl_pm_select_default_state(i2c_dev->dev); + if (ret) + return ret; + + ret = clk_bulk_enable(i2c_dev->nclocks, i2c_dev->clocks); + if (ret) + return ret; + + /* + * VI I2C device is attached to VE power domain which goes through + * power ON/OFF during PM runtime resume/suspend. So, controller + * should go through reset and need to re-initialize after power + * domain ON. + */ + if (i2c_dev->is_vi) { + ret = tegra_i2c_init(i2c_dev); + if (ret) + goto disable_clocks; + } + + return 0; + +disable_clocks: + clk_bulk_disable(i2c_dev->nclocks, i2c_dev->clocks); + + return ret; +} + +static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) +{ + struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); + + clk_bulk_disable(i2c_dev->nclocks, i2c_dev->clocks); + + return pinctrl_pm_select_idle_state(i2c_dev->dev); +} + static int __maybe_unused tegra_i2c_suspend(struct device *dev) { struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); From f1c2ff98065dced0806fc162ba99d6491d7d400a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:18:59 +0300 Subject: [PATCH 138/243] i2c: tegra: Clean up variable types Don't use signed types for unsigned values and use consistent types for sibling variables. Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 38 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 990eeb832492..a56c0873f4a3 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -206,20 +206,20 @@ struct tegra_i2c_hw_feature { bool has_continue_xfer_support; bool has_per_pkt_xfer_complete_irq; bool has_config_load_reg; - int clk_divisor_hs_mode; - int clk_divisor_std_mode; - int clk_divisor_fast_mode; - u16 clk_divisor_fast_plus_mode; + u32 clk_divisor_hs_mode; + u32 clk_divisor_std_mode; + u32 clk_divisor_fast_mode; + u32 clk_divisor_fast_plus_mode; bool has_multi_master_mode; bool has_slcg_override_reg; bool has_mst_fifo; const struct i2c_adapter_quirks *quirks; bool supports_bus_clear; bool has_apb_dma; - u8 tlow_std_mode; - u8 thigh_std_mode; - u8 tlow_fast_fastplus_mode; - u8 thigh_fast_fastplus_mode; + u32 tlow_std_mode; + u32 thigh_std_mode; + u32 tlow_fast_fastplus_mode; + u32 thigh_fast_fastplus_mode; u32 setup_hold_time_std_mode; u32 setup_hold_time_fast_fast_plus_mode; u32 setup_hold_time_hs_mode; @@ -267,15 +267,15 @@ struct tegra_i2c_dev { struct reset_control *rst; void __iomem *base; phys_addr_t base_phys; - int cont_id; - int irq; - int is_dvc; + unsigned int cont_id; + unsigned int irq; + bool is_dvc; bool is_vi; struct completion msg_complete; int msg_err; u8 *msg_buf; size_t msg_buf_remaining; - int msg_read; + bool msg_read; u32 bus_clk_rate; bool is_multimaster_mode; struct dma_chan *tx_dma_chan; @@ -329,13 +329,13 @@ static u32 i2c_readl(struct tegra_i2c_dev *i2c_dev, unsigned long reg) } static void i2c_writesl(struct tegra_i2c_dev *i2c_dev, void *data, - unsigned long reg, int len) + unsigned long reg, unsigned int len) { writesl(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg), data, len); } static void i2c_readsl(struct tegra_i2c_dev *i2c_dev, void *data, - unsigned long reg, int len) + unsigned long reg, unsigned int len) { readsl(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg), data, len); } @@ -712,10 +712,10 @@ static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev) static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) { u32 val; - int rx_fifo_avail; + unsigned int rx_fifo_avail; u8 *buf = i2c_dev->msg_buf; size_t buf_remaining = i2c_dev->msg_buf_remaining; - int words_to_transfer; + unsigned int words_to_transfer; /* * Catch overflow due to message fully sent @@ -773,10 +773,10 @@ static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) { u32 val; - int tx_fifo_avail; + unsigned int tx_fifo_avail; u8 *buf = i2c_dev->msg_buf; size_t buf_remaining = i2c_dev->msg_buf_remaining; - int words_to_transfer; + unsigned int words_to_transfer; if (i2c_dev->hw->has_mst_fifo) { val = i2c_readl(i2c_dev, I2C_MST_FIFO_STATUS); @@ -1134,7 +1134,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, i2c_dev->msg_buf = msg->buf; i2c_dev->msg_buf_remaining = msg->len; i2c_dev->msg_err = I2C_ERR_NONE; - i2c_dev->msg_read = (msg->flags & I2C_M_RD); + i2c_dev->msg_read = !!(msg->flags & I2C_M_RD); reinit_completion(&i2c_dev->msg_complete); if (i2c_dev->msg_read) From 56f1cd34a4e7a2d10f41e8d8e27d16a157fdd74d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:00 +0300 Subject: [PATCH 139/243] i2c: tegra: Remove outdated barrier() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The barrier() was intended to reduce possibility of racing with the interrupt handler, but driver's code evolved significantly and today's driver enables interrupt only when it waits for completion notification. Hence barrier() has no good use anymore, let's remove it. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index a56c0873f4a3..347651401cd6 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -795,18 +795,17 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) words_to_transfer = tx_fifo_avail; /* - * Update state before writing to FIFO. If this casues us - * to finish writing all bytes (AKA buf_remaining goes to 0) we - * have a potential for an interrupt (PACKET_XFER_COMPLETE is - * not maskable). We need to make sure that the isr sees - * buf_remaining as 0 and doesn't call us back re-entrantly. + * Update state before writing to FIFO. Note that this may + * cause us to finish writing all bytes (AKA buf_remaining + * goes to 0), hence we have a potential for an interrupt + * (PACKET_XFER_COMPLETE is not maskable), but GIC interrupt + * is disabled at this point. */ buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD; tx_fifo_avail -= words_to_transfer; i2c_dev->msg_buf_remaining = buf_remaining; i2c_dev->msg_buf = buf + words_to_transfer * BYTES_PER_FIFO_WORD; - barrier(); i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); @@ -827,10 +826,8 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) memcpy(&val, buf, buf_remaining); val = le32_to_cpu(val); - /* Again update before writing to FIFO to make sure isr sees. */ i2c_dev->msg_buf_remaining = 0; i2c_dev->msg_buf = NULL; - barrier(); i2c_writel(i2c_dev, val, I2C_TX_FIFO); } From 055ba33154a755c0fe79ec174d3b6f9353e0be11 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:01 +0300 Subject: [PATCH 140/243] i2c: tegra: Remove likely/unlikely from the code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The likely/unlikely annotations should be used only in a hot paths of performance-critical code. The I2C driver doesn't have such paths, and thus, there is no justification for usage of likely/unlikely annotations in the code. Hence remove them. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 347651401cd6..dc25578adb6b 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -852,7 +852,7 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) goto err; } - if (unlikely(status & status_err)) { + if (status & status_err) { tegra_i2c_disable_packet_mode(i2c_dev); if (status & I2C_INT_NO_ACK) i2c_dev->msg_err |= I2C_ERR_NO_ACK; @@ -1294,7 +1294,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, i2c_dev->msg_err); i2c_dev->is_curr_dma_xfer = false; - if (likely(i2c_dev->msg_err == I2C_ERR_NONE)) + if (i2c_dev->msg_err == I2C_ERR_NONE) return 0; tegra_i2c_init(i2c_dev); From d6a7969b798096533928c2a57eb6b27ffdd344aa Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:02 +0300 Subject: [PATCH 141/243] i2c: tegra: Remove redundant check in tegra_i2c_issue_bus_clear() The tegra_i2c_wait_for_config_load() checks for 'has_config_load_reg' by itself, hence there is no need to duplicate the check. Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index dc25578adb6b..79fa98423cae 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1082,11 +1082,10 @@ static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) reg = FIELD_PREP(I2C_BC_SCLK_THRESHOLD, 9) | I2C_BC_STOP_COND | I2C_BC_TERMINATE; i2c_writel(i2c_dev, reg, I2C_BUS_CLEAR_CNFG); - if (i2c_dev->hw->has_config_load_reg) { - err = tegra_i2c_wait_for_config_load(i2c_dev); - if (err) - return err; - } + + err = tegra_i2c_wait_for_config_load(i2c_dev); + if (err) + return err; reg |= I2C_BC_ENABLE; i2c_writel(i2c_dev, reg, I2C_BUS_CLEAR_CNFG); From 8d57c2f3654e7e23f5552d28fad5715cb567a148 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:03 +0300 Subject: [PATCH 142/243] i2c: tegra: Remove "dma" variable from tegra_i2c_xfer_msg() The "dma" variable of tegra_i2c_xfer_msg() function doesn't bring much in regards to readability and generation of the code. Besides readability, it's also not very nice that the is_curr_dma_xfer is initialized in tegra_i2c_xfer_msg() and then could be overridden by tegra_i2c_config_fifo_trig(). In a result, the "dma" variable creates slight confusion since it's not instantly obvious why it's set after tegra_i2c_config_fifo_trig(). Hence should be better to have the variable removed. This makes code more consistent. Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 79fa98423cae..eb62284e2293 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1120,7 +1120,6 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, size_t xfer_size; u32 *buffer = NULL; int err = 0; - bool dma; u16 xfer_time = 100; err = tegra_i2c_flush_fifos(i2c_dev); @@ -1143,7 +1142,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, i2c_dev->dma_buf && !i2c_dev->is_curr_atomic_xfer; tegra_i2c_config_fifo_trig(i2c_dev, xfer_size); - dma = i2c_dev->is_curr_dma_xfer; + /* * Transfer time in mSec = Total bits / transfer rate * Total bits = 9 bits per byte (including ACK bit) + Start & stop bits @@ -1153,7 +1152,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, int_mask = I2C_INT_NO_ACK | I2C_INT_ARBITRATION_LOST; tegra_i2c_unmask_irq(i2c_dev, int_mask); - if (dma) { + if (i2c_dev->is_curr_dma_xfer) { if (i2c_dev->msg_read) { dma_sync_single_for_device(i2c_dev->dev, i2c_dev->dma_phys, @@ -1181,13 +1180,13 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, PACKET_HEADER0_PROTOCOL_I2C) | FIELD_PREP(PACKET_HEADER0_CONT_ID, i2c_dev->cont_id) | FIELD_PREP(PACKET_HEADER0_PACKET_ID, 1); - if (dma && !i2c_dev->msg_read) + if (i2c_dev->is_curr_dma_xfer && !i2c_dev->msg_read) *buffer++ = packet_header; else i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); packet_header = msg->len - 1; - if (dma && !i2c_dev->msg_read) + if (i2c_dev->is_curr_dma_xfer && !i2c_dev->msg_read) *buffer++ = packet_header; else i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); @@ -1207,13 +1206,13 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, packet_header |= I2C_HEADER_CONT_ON_NAK; if (msg->flags & I2C_M_RD) packet_header |= I2C_HEADER_READ; - if (dma && !i2c_dev->msg_read) + if (i2c_dev->is_curr_dma_xfer && !i2c_dev->msg_read) *buffer++ = packet_header; else i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); if (!i2c_dev->msg_read) { - if (dma) { + if (i2c_dev->is_curr_dma_xfer) { memcpy(buffer, msg->buf, msg->len); dma_sync_single_for_device(i2c_dev->dev, i2c_dev->dma_phys, @@ -1233,7 +1232,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->hw->has_per_pkt_xfer_complete_irq) int_mask |= I2C_INT_PACKET_XFER_COMPLETE; - if (!dma) { + if (!i2c_dev->is_curr_dma_xfer) { if (msg->flags & I2C_M_RD) int_mask |= I2C_INT_RX_FIFO_DATA_REQ; else if (i2c_dev->msg_buf_remaining) @@ -1244,7 +1243,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, dev_dbg(i2c_dev->dev, "unmasked irq: %02x\n", i2c_readl(i2c_dev, I2C_INT_MASK)); - if (dma) { + if (i2c_dev->is_curr_dma_xfer) { time_left = tegra_i2c_wait_completion_timeout( i2c_dev, &i2c_dev->dma_complete, xfer_time); From e57ac5aba01a1f6d23fea5588252736e2e3d995c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:04 +0300 Subject: [PATCH 143/243] i2c: tegra: Rename wait/poll functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop '_timeout' postfix from the wait/poll completion function names in order to make the names shorter, making code cleaner a tad. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index eb62284e2293..f8bee67370aa 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1010,10 +1010,9 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, i2c_writel(i2c_dev, val, reg); } -static unsigned long -tegra_i2c_poll_completion_timeout(struct tegra_i2c_dev *i2c_dev, - struct completion *complete, - unsigned int timeout_ms) +static unsigned long tegra_i2c_poll_completion(struct tegra_i2c_dev *i2c_dev, + struct completion *complete, + unsigned int timeout_ms) { ktime_t ktime = ktime_get(); ktime_t ktimeout = ktime_add_ms(ktime, timeout_ms); @@ -1037,16 +1036,14 @@ tegra_i2c_poll_completion_timeout(struct tegra_i2c_dev *i2c_dev, return 0; } -static unsigned long -tegra_i2c_wait_completion_timeout(struct tegra_i2c_dev *i2c_dev, - struct completion *complete, - unsigned int timeout_ms) +static unsigned long tegra_i2c_wait_completion(struct tegra_i2c_dev *i2c_dev, + struct completion *complete, + unsigned int timeout_ms) { unsigned long ret; if (i2c_dev->is_curr_atomic_xfer) { - ret = tegra_i2c_poll_completion_timeout(i2c_dev, complete, - timeout_ms); + ret = tegra_i2c_poll_completion(i2c_dev, complete, timeout_ms); } else { enable_irq(i2c_dev->irq); ret = wait_for_completion_timeout(complete, @@ -1064,8 +1061,7 @@ tegra_i2c_wait_completion_timeout(struct tegra_i2c_dev *i2c_dev, * needs to be checked after timeout. */ if (ret == 0) - ret = tegra_i2c_poll_completion_timeout(i2c_dev, - complete, 0); + ret = tegra_i2c_poll_completion(i2c_dev, complete, 0); } return ret; @@ -1091,8 +1087,7 @@ static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) i2c_writel(i2c_dev, reg, I2C_BUS_CLEAR_CNFG); tegra_i2c_unmask_irq(i2c_dev, I2C_INT_BUS_CLR_DONE); - time_left = tegra_i2c_wait_completion_timeout( - i2c_dev, &i2c_dev->msg_complete, 50); + time_left = tegra_i2c_wait_completion(i2c_dev, &i2c_dev->msg_complete, 50); tegra_i2c_mask_irq(i2c_dev, I2C_INT_BUS_CLR_DONE); if (time_left == 0) { @@ -1244,8 +1239,9 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, i2c_readl(i2c_dev, I2C_INT_MASK)); if (i2c_dev->is_curr_dma_xfer) { - time_left = tegra_i2c_wait_completion_timeout( - i2c_dev, &i2c_dev->dma_complete, xfer_time); + time_left = tegra_i2c_wait_completion(i2c_dev, + &i2c_dev->dma_complete, + xfer_time); /* * Synchronize DMA first, since dmaengine_terminate_sync() @@ -1276,8 +1272,8 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, } } - time_left = tegra_i2c_wait_completion_timeout( - i2c_dev, &i2c_dev->msg_complete, xfer_time); + time_left = tegra_i2c_wait_completion(i2c_dev, &i2c_dev->msg_complete, + xfer_time); tegra_i2c_mask_irq(i2c_dev, int_mask); From 4a8e0f87977ec3b83db13d3ac22c6fc5a9703fd9 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:05 +0300 Subject: [PATCH 144/243] i2c: tegra: Factor out error recovery from tegra_i2c_xfer_msg() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out error recovery code from tegra_i2c_xfer_msg() in order to make this function easier to read and follow. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 46 ++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index f8bee67370aa..95d257cbd800 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1105,6 +1105,32 @@ static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) return -EAGAIN; } +static int tegra_i2c_error_recover(struct tegra_i2c_dev *i2c_dev, + struct i2c_msg *msg) +{ + if (i2c_dev->msg_err == I2C_ERR_NONE) + return 0; + + tegra_i2c_init(i2c_dev); + + /* start recovery upon arbitration loss in single master mode */ + if (i2c_dev->msg_err == I2C_ERR_ARBITRATION_LOST) { + if (!i2c_dev->is_multimaster_mode) + return i2c_recover_bus(&i2c_dev->adapter); + + return -EAGAIN; + } + + if (i2c_dev->msg_err == I2C_ERR_NO_ACK) { + if (msg->flags & I2C_M_IGNORE_NAK) + return 0; + + return -EREMOTEIO; + } + + return -EIO; +} + static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, struct i2c_msg *msg, enum msg_end_type end_state) @@ -1288,24 +1314,12 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, i2c_dev->msg_err); i2c_dev->is_curr_dma_xfer = false; - if (i2c_dev->msg_err == I2C_ERR_NONE) - return 0; - tegra_i2c_init(i2c_dev); - /* start recovery upon arbitration loss in single master mode */ - if (i2c_dev->msg_err == I2C_ERR_ARBITRATION_LOST) { - if (!i2c_dev->is_multimaster_mode) - return i2c_recover_bus(&i2c_dev->adapter); - return -EAGAIN; - } + err = tegra_i2c_error_recover(i2c_dev, msg); + if (err) + return err; - if (i2c_dev->msg_err == I2C_ERR_NO_ACK) { - if (msg->flags & I2C_M_IGNORE_NAK) - return 0; - return -EREMOTEIO; - } - - return -EIO; + return 0; } static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], From 4be62340f3ce62b90a01ce11a6bc2426606614c5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:06 +0300 Subject: [PATCH 145/243] i2c: tegra: Factor out packet header setup from tegra_i2c_xfer_msg() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code related to packet header setting up is a bit messy and makes tegra_i2c_xfer_msg() more difficult to read than it could be. Let's factor the packet header setup from tegra_i2c_xfer_msg() into separate function in order to make code easier to read and follow. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 95 ++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 40 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 95d257cbd800..fbeae872ece1 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -281,7 +281,7 @@ struct tegra_i2c_dev { struct dma_chan *tx_dma_chan; struct dma_chan *rx_dma_chan; dma_addr_t dma_phys; - u32 *dma_buf; + void *dma_buf; unsigned int dma_buf_size; bool is_curr_dma_xfer; struct completion dma_complete; @@ -1105,6 +1105,57 @@ static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) return -EAGAIN; } +static void tegra_i2c_push_packet_header(struct tegra_i2c_dev *i2c_dev, + struct i2c_msg *msg, + enum msg_end_type end_state) +{ + u32 *dma_buf = i2c_dev->dma_buf; + u32 packet_header; + + packet_header = FIELD_PREP(PACKET_HEADER0_HEADER_SIZE, 0) | + FIELD_PREP(PACKET_HEADER0_PROTOCOL, + PACKET_HEADER0_PROTOCOL_I2C) | + FIELD_PREP(PACKET_HEADER0_CONT_ID, i2c_dev->cont_id) | + FIELD_PREP(PACKET_HEADER0_PACKET_ID, 1); + + if (i2c_dev->is_curr_dma_xfer && !i2c_dev->msg_read) + *dma_buf++ = packet_header; + else + i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); + + packet_header = msg->len - 1; + + if (i2c_dev->is_curr_dma_xfer && !i2c_dev->msg_read) + *dma_buf++ = packet_header; + else + i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); + + packet_header = I2C_HEADER_IE_ENABLE; + + if (end_state == MSG_END_CONTINUE) + packet_header |= I2C_HEADER_CONTINUE_XFER; + else if (end_state == MSG_END_REPEAT_START) + packet_header |= I2C_HEADER_REPEAT_START; + + if (msg->flags & I2C_M_TEN) { + packet_header |= msg->addr; + packet_header |= I2C_HEADER_10BIT_ADDR; + } else { + packet_header |= msg->addr << I2C_HEADER_SLAVE_ADDR_SHIFT; + } + + if (msg->flags & I2C_M_IGNORE_NAK) + packet_header |= I2C_HEADER_CONT_ON_NAK; + + if (msg->flags & I2C_M_RD) + packet_header |= I2C_HEADER_READ; + + if (i2c_dev->is_curr_dma_xfer && !i2c_dev->msg_read) + *dma_buf++ = packet_header; + else + i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); +} + static int tegra_i2c_error_recover(struct tegra_i2c_dev *i2c_dev, struct i2c_msg *msg) { @@ -1135,11 +1186,9 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, struct i2c_msg *msg, enum msg_end_type end_state) { - u32 packet_header; u32 int_mask; unsigned long time_left; size_t xfer_size; - u32 *buffer = NULL; int err = 0; u16 xfer_time = 100; @@ -1192,49 +1241,15 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, i2c_dev->dma_phys, xfer_size, DMA_TO_DEVICE); - buffer = i2c_dev->dma_buf; } } - packet_header = FIELD_PREP(PACKET_HEADER0_HEADER_SIZE, 0) | - FIELD_PREP(PACKET_HEADER0_PROTOCOL, - PACKET_HEADER0_PROTOCOL_I2C) | - FIELD_PREP(PACKET_HEADER0_CONT_ID, i2c_dev->cont_id) | - FIELD_PREP(PACKET_HEADER0_PACKET_ID, 1); - if (i2c_dev->is_curr_dma_xfer && !i2c_dev->msg_read) - *buffer++ = packet_header; - else - i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); - - packet_header = msg->len - 1; - if (i2c_dev->is_curr_dma_xfer && !i2c_dev->msg_read) - *buffer++ = packet_header; - else - i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); - - packet_header = I2C_HEADER_IE_ENABLE; - if (end_state == MSG_END_CONTINUE) - packet_header |= I2C_HEADER_CONTINUE_XFER; - else if (end_state == MSG_END_REPEAT_START) - packet_header |= I2C_HEADER_REPEAT_START; - if (msg->flags & I2C_M_TEN) { - packet_header |= msg->addr; - packet_header |= I2C_HEADER_10BIT_ADDR; - } else { - packet_header |= msg->addr << I2C_HEADER_SLAVE_ADDR_SHIFT; - } - if (msg->flags & I2C_M_IGNORE_NAK) - packet_header |= I2C_HEADER_CONT_ON_NAK; - if (msg->flags & I2C_M_RD) - packet_header |= I2C_HEADER_READ; - if (i2c_dev->is_curr_dma_xfer && !i2c_dev->msg_read) - *buffer++ = packet_header; - else - i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); + tegra_i2c_push_packet_header(i2c_dev, msg, end_state); if (!i2c_dev->msg_read) { if (i2c_dev->is_curr_dma_xfer) { - memcpy(buffer, msg->buf, msg->len); + memcpy(i2c_dev->dma_buf + I2C_PACKET_HEADER_SIZE, + msg->buf, msg->len); dma_sync_single_for_device(i2c_dev->dev, i2c_dev->dma_phys, xfer_size, From 507ae6ab724b780d72c010ffb106e580799ba641 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:07 +0300 Subject: [PATCH 146/243] i2c: tegra: Factor out register polling into separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out register polling into a separate function in order to remove boilerplate code and make code cleaner. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Reviewed-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 57 +++++++++++++++------------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index fbeae872ece1..2d18a35dc18c 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -514,10 +514,24 @@ static void tegra_i2c_vi_init(struct tegra_i2c_dev *i2c_dev) i2c_writel(i2c_dev, 0x0, I2C_TLOW_SEXT); } +static int tegra_i2c_poll_register(struct tegra_i2c_dev *i2c_dev, + u32 reg, u32 mask, u32 delay_us, + u32 timeout_us) +{ + void __iomem *addr = i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg); + u32 val; + + if (!i2c_dev->is_curr_atomic_xfer) + return readl_relaxed_poll_timeout(addr, val, !(val & mask), + delay_us, timeout_us); + + return readl_relaxed_poll_timeout_atomic(addr, val, !(val & mask), + delay_us, timeout_us); +} + static int tegra_i2c_flush_fifos(struct tegra_i2c_dev *i2c_dev) { - u32 mask, val, offset, reg_offset; - void __iomem *addr; + u32 mask, val, offset; int err; if (i2c_dev->hw->has_mst_fifo) { @@ -534,16 +548,7 @@ static int tegra_i2c_flush_fifos(struct tegra_i2c_dev *i2c_dev) val |= mask; i2c_writel(i2c_dev, val, offset); - reg_offset = tegra_i2c_reg_addr(i2c_dev, offset); - addr = i2c_dev->base + reg_offset; - - if (i2c_dev->is_curr_atomic_xfer) - err = readl_relaxed_poll_timeout_atomic(addr, val, !(val & mask), - 1000, 1000000); - else - err = readl_relaxed_poll_timeout(addr, val, !(val & mask), - 1000, 1000000); - + err = tegra_i2c_poll_register(i2c_dev, offset, mask, 1000, 1000000); if (err) { dev_err(i2c_dev->dev, "failed to flush FIFO\n"); return err; @@ -553,30 +558,18 @@ static int tegra_i2c_flush_fifos(struct tegra_i2c_dev *i2c_dev) static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) { - unsigned long reg_offset; - void __iomem *addr; - u32 val; int err; - if (i2c_dev->hw->has_config_load_reg) { - reg_offset = tegra_i2c_reg_addr(i2c_dev, I2C_CONFIG_LOAD); - addr = i2c_dev->base + reg_offset; - i2c_writel(i2c_dev, I2C_MSTR_CONFIG_LOAD, I2C_CONFIG_LOAD); + if (!i2c_dev->hw->has_config_load_reg) + return 0; - if (i2c_dev->is_curr_atomic_xfer) - err = readl_relaxed_poll_timeout_atomic( - addr, val, val == 0, 1000, - I2C_CONFIG_LOAD_TIMEOUT); - else - err = readl_relaxed_poll_timeout( - addr, val, val == 0, 1000, - I2C_CONFIG_LOAD_TIMEOUT); + i2c_writel(i2c_dev, I2C_MSTR_CONFIG_LOAD, I2C_CONFIG_LOAD); - if (err) { - dev_warn(i2c_dev->dev, - "timeout waiting for config load\n"); - return err; - } + err = tegra_i2c_poll_register(i2c_dev, I2C_CONFIG_LOAD, 0xffffffff, + 1000, I2C_CONFIG_LOAD_TIMEOUT); + if (err) { + dev_warn(i2c_dev->dev, "timeout waiting for config load\n"); + return err; } return 0; From d380d48ff376d6e98ce0c07a70487beb5d05cb4b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:08 +0300 Subject: [PATCH 147/243] i2c: tegra: Factor out hardware initialization into separate function Factor out hardware initialization into a separate function from the probe function. The only place where runtime PM needs to be resumed during probe is the place of hardware initialization, hence it makes sense to factor out it in order to have a bit cleaner error handling in tegra_i2c_probe(). Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 2d18a35dc18c..d8fc5cdcc310 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1661,9 +1661,23 @@ static void tegra_i2c_release_clocks(struct tegra_i2c_dev *i2c_dev) clk_bulk_unprepare(i2c_dev->nclocks, i2c_dev->clocks); } +static int tegra_i2c_init_hardware(struct tegra_i2c_dev *i2c_dev) +{ + int ret; + + ret = pm_runtime_get_sync(i2c_dev->dev); + if (ret < 0) + dev_err(i2c_dev->dev, "runtime resume failed: %d\n", ret); + else + ret = tegra_i2c_init(i2c_dev); + + pm_runtime_put(i2c_dev->dev); + + return ret; +} + static int tegra_i2c_probe(struct platform_device *pdev) { - struct device *dev = &pdev->dev; struct tegra_i2c_dev *i2c_dev; struct resource *res; int ret; @@ -1729,15 +1743,10 @@ static int tegra_i2c_probe(struct platform_device *pdev) if (!i2c_dev->is_vi) pm_runtime_irq_safe(&pdev->dev); pm_runtime_enable(&pdev->dev); - ret = pm_runtime_get_sync(i2c_dev->dev); - if (ret < 0) { - dev_err(dev, "runtime resume failed\n"); - goto put_rpm; - } - ret = tegra_i2c_init(i2c_dev); + ret = tegra_i2c_init_hardware(i2c_dev); if (ret) - goto put_rpm; + goto release_rpm; i2c_set_adapdata(&i2c_dev->adapter, i2c_dev); i2c_dev->adapter.dev.of_node = pdev->dev.of_node; @@ -1758,14 +1767,11 @@ static int tegra_i2c_probe(struct platform_device *pdev) ret = i2c_add_numbered_adapter(&i2c_dev->adapter); if (ret) - goto put_rpm; - - pm_runtime_put(&pdev->dev); + goto release_rpm; return 0; -put_rpm: - pm_runtime_put_sync(&pdev->dev); +release_rpm: pm_runtime_disable(&pdev->dev); tegra_i2c_release_dma(i2c_dev); From 3b3b8e59d0144beb3e4c5dc29abfa1f89e6d4423 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:09 +0300 Subject: [PATCH 148/243] i2c: tegra: Check errors for both positive and negative values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver's code is inconsistent in regards to the error values checking. The correct way should be to check both positive and negative values. This patch cleans up the error-checks in the code. Note that the pm_runtime_get_sync() could return positive value on success, hence only relevant parts of the code are changed by this patch. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index d8fc5cdcc310..41b6341be7b5 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -982,7 +982,7 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, slv_config.device_fc = true; ret = dmaengine_slave_config(chan, &slv_config); - if (ret < 0) { + if (ret) { dev_err(i2c_dev->dev, "DMA slave config failed: %d\n", ret); dev_err(i2c_dev->dev, "falling back to PIO\n"); @@ -1222,7 +1222,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, xfer_size, DMA_FROM_DEVICE); err = tegra_i2c_dma_submit(i2c_dev, xfer_size); - if (err < 0) { + if (err) { dev_err(i2c_dev->dev, "starting RX DMA failed, err %d\n", err); @@ -1248,7 +1248,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, xfer_size, DMA_TO_DEVICE); err = tegra_i2c_dma_submit(i2c_dev, xfer_size); - if (err < 0) { + if (err) { dev_err(i2c_dev->dev, "starting TX DMA failed, err %d\n", err); From 89e3748acd0bf657af0e6fc47c4ed0943afb1a75 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:10 +0300 Subject: [PATCH 149/243] i2c: tegra: Improve formatting of variables Reorder definition of variables in the code to have them sorted by length and grouped logically, also replace "unsigned long" with "u32". Do this in order to make code easier to read. Reviewed-by: Andy Shevchenko Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 96 ++++++++++++++++------------------ 1 file changed, 45 insertions(+), 51 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 41b6341be7b5..823d5baadd68 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -259,42 +259,49 @@ struct tegra_i2c_hw_feature { */ struct tegra_i2c_dev { struct device *dev; - const struct tegra_i2c_hw_feature *hw; struct i2c_adapter adapter; - struct clk *div_clk; - struct clk_bulk_data clocks[2]; - unsigned int nclocks; + + const struct tegra_i2c_hw_feature *hw; struct reset_control *rst; - void __iomem *base; - phys_addr_t base_phys; unsigned int cont_id; unsigned int irq; - bool is_dvc; - bool is_vi; + + phys_addr_t base_phys; + void __iomem *base; + + struct clk_bulk_data clocks[2]; + unsigned int nclocks; + + struct clk *div_clk; + u32 bus_clk_rate; + struct completion msg_complete; + size_t msg_buf_remaining; int msg_err; u8 *msg_buf; - size_t msg_buf_remaining; - bool msg_read; - u32 bus_clk_rate; - bool is_multimaster_mode; + + struct completion dma_complete; struct dma_chan *tx_dma_chan; struct dma_chan *rx_dma_chan; + unsigned int dma_buf_size; dma_addr_t dma_phys; void *dma_buf; - unsigned int dma_buf_size; - bool is_curr_dma_xfer; - struct completion dma_complete; + + bool is_multimaster_mode; bool is_curr_atomic_xfer; + bool is_curr_dma_xfer; + bool msg_read; + bool is_dvc; + bool is_vi; }; static void dvc_writel(struct tegra_i2c_dev *i2c_dev, u32 val, - unsigned long reg) + unsigned int reg) { writel_relaxed(val, i2c_dev->base + reg); } -static u32 dvc_readl(struct tegra_i2c_dev *i2c_dev, unsigned long reg) +static u32 dvc_readl(struct tegra_i2c_dev *i2c_dev, unsigned int reg) { return readl_relaxed(i2c_dev->base + reg); } @@ -303,8 +310,7 @@ static u32 dvc_readl(struct tegra_i2c_dev *i2c_dev, unsigned long reg) * i2c_writel and i2c_readl will offset the register if necessary to talk * to the I2C block inside the DVC block */ -static unsigned long tegra_i2c_reg_addr(struct tegra_i2c_dev *i2c_dev, - unsigned long reg) +static u32 tegra_i2c_reg_addr(struct tegra_i2c_dev *i2c_dev, unsigned int reg) { if (i2c_dev->is_dvc) reg += (reg >= I2C_TX_FIFO) ? 0x10 : 0x40; @@ -313,8 +319,7 @@ static unsigned long tegra_i2c_reg_addr(struct tegra_i2c_dev *i2c_dev, return reg; } -static void i2c_writel(struct tegra_i2c_dev *i2c_dev, u32 val, - unsigned long reg) +static void i2c_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned int reg) { writel_relaxed(val, i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); @@ -323,19 +328,19 @@ static void i2c_writel(struct tegra_i2c_dev *i2c_dev, u32 val, readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); } -static u32 i2c_readl(struct tegra_i2c_dev *i2c_dev, unsigned long reg) +static u32 i2c_readl(struct tegra_i2c_dev *i2c_dev, unsigned int reg) { return readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); } static void i2c_writesl(struct tegra_i2c_dev *i2c_dev, void *data, - unsigned long reg, unsigned int len) + unsigned int reg, unsigned int len) { writesl(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg), data, len); } static void i2c_readsl(struct tegra_i2c_dev *i2c_dev, void *data, - unsigned long reg, unsigned int len) + unsigned int reg, unsigned int len) { readsl(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg), data, len); } @@ -410,8 +415,8 @@ static void tegra_i2c_release_dma(struct tegra_i2c_dev *i2c_dev) static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) { struct dma_chan *chan; - u32 *dma_buf; dma_addr_t dma_phys; + u32 *dma_buf; int err; if (!i2c_dev->hw->has_apb_dma || i2c_dev->is_vi) @@ -577,12 +582,8 @@ static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) { - u32 val; + u32 val, clk_divisor, clk_multiplier, tsu_thd, tlow, thigh, non_hs_mode; int err; - u32 clk_divisor, clk_multiplier; - u32 non_hs_mode; - u32 tsu_thd; - u8 tlow, thigh; /* * The reset shouldn't ever fail in practice. The failure will be a @@ -704,11 +705,10 @@ static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev) static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) { - u32 val; - unsigned int rx_fifo_avail; - u8 *buf = i2c_dev->msg_buf; size_t buf_remaining = i2c_dev->msg_buf_remaining; - unsigned int words_to_transfer; + unsigned int words_to_transfer, rx_fifo_avail; + u8 *buf = i2c_dev->msg_buf; + u32 val; /* * Catch overflow due to message fully sent @@ -765,11 +765,10 @@ static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) { - u32 val; - unsigned int tx_fifo_avail; - u8 *buf = i2c_dev->msg_buf; size_t buf_remaining = i2c_dev->msg_buf_remaining; - unsigned int words_to_transfer; + unsigned int words_to_transfer, tx_fifo_avail; + u8 *buf = i2c_dev->msg_buf; + u32 val; if (i2c_dev->hw->has_mst_fifo) { val = i2c_readl(i2c_dev, I2C_MST_FIFO_STATUS); @@ -830,9 +829,9 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) { - u32 status; const u32 status_err = I2C_INT_NO_ACK | I2C_INT_ARBITRATION_LOST; struct tegra_i2c_dev *i2c_dev = dev_id; + u32 status; status = i2c_readl(i2c_dev, I2C_INT_STATUS); @@ -936,12 +935,10 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, size_t len) { - u32 val, reg; - u8 dma_burst; struct dma_slave_config slv_config = {0}; + u32 val, reg, dma_burst, reg_offset; struct dma_chan *chan; int ret; - unsigned long reg_offset; if (i2c_dev->hw->has_mst_fifo) reg = I2C_MST_FIFO_CONTROL; @@ -1063,9 +1060,8 @@ static unsigned long tegra_i2c_wait_completion(struct tegra_i2c_dev *i2c_dev, static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) { struct tegra_i2c_dev *i2c_dev = i2c_get_adapdata(adap); + u32 reg, time_left; int err; - unsigned long time_left; - u32 reg; reinit_completion(&i2c_dev->msg_complete); reg = FIELD_PREP(I2C_BC_SCLK_THRESHOLD, 9) | I2C_BC_STOP_COND | @@ -1179,11 +1175,10 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, struct i2c_msg *msg, enum msg_end_type end_state) { - u32 int_mask; - unsigned long time_left; + unsigned long time_left, xfer_time = 100; size_t xfer_size; - int err = 0; - u16 xfer_time = 100; + u32 int_mask; + int err; err = tegra_i2c_flush_fifos(i2c_dev); if (err) @@ -1334,8 +1329,7 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) { struct tegra_i2c_dev *i2c_dev = i2c_get_adapdata(adap); - int i; - int ret; + int i, ret; ret = pm_runtime_get_sync(i2c_dev->dev); if (ret < 0) { @@ -1595,8 +1589,8 @@ MODULE_DEVICE_TABLE(of, tegra_i2c_of_match); static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev) { struct device_node *np = i2c_dev->dev->of_node; - int ret; bool multi_mode; + int ret; ret = of_property_read_u32(np, "clock-frequency", &i2c_dev->bus_clk_rate); From a99042e7d9fb5c9143b368210c08f8447a48c2c8 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:11 +0300 Subject: [PATCH 150/243] i2c: tegra: Clean up variable names Rename "ret" variables to "err" in order to make code a bit more expressive, emphasizing that the returned value is an error code. Same vice versa, where appropriate. Rename variable "reg" to "val" in order to better reflect the actual usage of the variable in the code and to make naming consistent with the rest of the code. Use briefer names for a few members of the tegra_i2c_dev structure in order to improve readability of the code. All dev/&pdev->dev are replaced with i2c_dev->dev in order to have uniform code style across the driver. Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 177 +++++++++++++++++---------------- 1 file changed, 90 insertions(+), 87 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 823d5baadd68..1a1388339660 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -247,15 +247,15 @@ struct tegra_i2c_hw_feature { * @msg_buf_remaining: size of unsent data in the message buffer * @msg_read: identifies read transfers * @bus_clk_rate: current I2C bus clock rate - * @is_multimaster_mode: track if I2C controller is in multi-master mode + * @multimaster_mode: indicates that I2C controller is in multi-master mode * @tx_dma_chan: DMA transmit channel * @rx_dma_chan: DMA receive channel * @dma_phys: handle to DMA resources * @dma_buf: pointer to allocated DMA buffer * @dma_buf_size: DMA buffer size - * @is_curr_dma_xfer: indicates active DMA transfer + * @dma_mode: indicates active DMA transfer * @dma_complete: DMA completion notifier - * @is_curr_atomic_xfer: indicates active atomic transfer + * @atomic_mode: indicates active atomic transfer */ struct tegra_i2c_dev { struct device *dev; @@ -287,9 +287,9 @@ struct tegra_i2c_dev { dma_addr_t dma_phys; void *dma_buf; - bool is_multimaster_mode; - bool is_curr_atomic_xfer; - bool is_curr_dma_xfer; + bool multimaster_mode; + bool atomic_mode; + bool dma_mode; bool msg_read; bool is_dvc; bool is_vi; @@ -526,7 +526,7 @@ static int tegra_i2c_poll_register(struct tegra_i2c_dev *i2c_dev, void __iomem *addr = i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg); u32 val; - if (!i2c_dev->is_curr_atomic_xfer) + if (!i2c_dev->atomic_mode) return readl_relaxed_poll_timeout(addr, val, !(val & mask), delay_us, timeout_us); @@ -674,7 +674,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) if (err) return err; - if (i2c_dev->is_multimaster_mode && i2c_dev->hw->has_slcg_override_reg) + if (i2c_dev->multimaster_mode && i2c_dev->hw->has_slcg_override_reg) i2c_writel(i2c_dev, I2C_MST_CORE_CLKEN_OVR, I2C_CLKEN_OVERRIDE); err = tegra_i2c_wait_for_config_load(i2c_dev); @@ -860,7 +860,7 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) if (i2c_dev->hw->supports_bus_clear && (status & I2C_INT_BUS_CLR_DONE)) goto err; - if (!i2c_dev->is_curr_dma_xfer) { + if (!i2c_dev->dma_mode) { if (i2c_dev->msg_read && (status & I2C_INT_RX_FIFO_DATA_REQ)) { if (tegra_i2c_empty_rx_fifo(i2c_dev)) { /* @@ -894,7 +894,7 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) * so forcing msg_buf_remaining to 0 in DMA mode. */ if (status & I2C_INT_PACKET_XFER_COMPLETE) { - if (i2c_dev->is_curr_dma_xfer) + if (i2c_dev->dma_mode) i2c_dev->msg_buf_remaining = 0; /* * Underflow error condition: XFER_COMPLETE before message @@ -918,7 +918,7 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) if (i2c_dev->is_dvc) dvc_writel(i2c_dev, DVC_STATUS_I2C_DONE_INTR, DVC_STATUS); - if (i2c_dev->is_curr_dma_xfer) { + if (i2c_dev->dma_mode) { if (i2c_dev->msg_read) dmaengine_terminate_async(i2c_dev->rx_dma_chan); else @@ -938,14 +938,14 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, struct dma_slave_config slv_config = {0}; u32 val, reg, dma_burst, reg_offset; struct dma_chan *chan; - int ret; + int err; if (i2c_dev->hw->has_mst_fifo) reg = I2C_MST_FIFO_CONTROL; else reg = I2C_FIFO_CONTROL; - if (i2c_dev->is_curr_dma_xfer) { + if (i2c_dev->dma_mode) { if (len & 0xF) dma_burst = 1; else if (len & 0x10) @@ -978,13 +978,13 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, } slv_config.device_fc = true; - ret = dmaengine_slave_config(chan, &slv_config); - if (ret) { + err = dmaengine_slave_config(chan, &slv_config); + if (err) { dev_err(i2c_dev->dev, "DMA slave config failed: %d\n", - ret); + err); dev_err(i2c_dev->dev, "falling back to PIO\n"); tegra_i2c_release_dma(i2c_dev); - i2c_dev->is_curr_dma_xfer = false; + i2c_dev->dma_mode = false; } else { goto out; } @@ -1032,7 +1032,7 @@ static unsigned long tegra_i2c_wait_completion(struct tegra_i2c_dev *i2c_dev, { unsigned long ret; - if (i2c_dev->is_curr_atomic_xfer) { + if (i2c_dev->atomic_mode) { ret = tegra_i2c_poll_completion(i2c_dev, complete, timeout_ms); } else { enable_irq(i2c_dev->irq); @@ -1060,20 +1060,20 @@ static unsigned long tegra_i2c_wait_completion(struct tegra_i2c_dev *i2c_dev, static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) { struct tegra_i2c_dev *i2c_dev = i2c_get_adapdata(adap); - u32 reg, time_left; + u32 val, time_left; int err; reinit_completion(&i2c_dev->msg_complete); - reg = FIELD_PREP(I2C_BC_SCLK_THRESHOLD, 9) | I2C_BC_STOP_COND | + val = FIELD_PREP(I2C_BC_SCLK_THRESHOLD, 9) | I2C_BC_STOP_COND | I2C_BC_TERMINATE; - i2c_writel(i2c_dev, reg, I2C_BUS_CLEAR_CNFG); + i2c_writel(i2c_dev, val, I2C_BUS_CLEAR_CNFG); err = tegra_i2c_wait_for_config_load(i2c_dev); if (err) return err; - reg |= I2C_BC_ENABLE; - i2c_writel(i2c_dev, reg, I2C_BUS_CLEAR_CNFG); + val |= I2C_BC_ENABLE; + i2c_writel(i2c_dev, val, I2C_BUS_CLEAR_CNFG); tegra_i2c_unmask_irq(i2c_dev, I2C_INT_BUS_CLR_DONE); time_left = tegra_i2c_wait_completion(i2c_dev, &i2c_dev->msg_complete, 50); @@ -1084,8 +1084,8 @@ static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) return -ETIMEDOUT; } - reg = i2c_readl(i2c_dev, I2C_BUS_CLEAR_STATUS); - if (!(reg & I2C_BC_STATUS)) { + val = i2c_readl(i2c_dev, I2C_BUS_CLEAR_STATUS); + if (!(val & I2C_BC_STATUS)) { dev_err(i2c_dev->dev, "un-recovered arbitration lost\n"); return -EIO; @@ -1107,14 +1107,14 @@ static void tegra_i2c_push_packet_header(struct tegra_i2c_dev *i2c_dev, FIELD_PREP(PACKET_HEADER0_CONT_ID, i2c_dev->cont_id) | FIELD_PREP(PACKET_HEADER0_PACKET_ID, 1); - if (i2c_dev->is_curr_dma_xfer && !i2c_dev->msg_read) + if (i2c_dev->dma_mode && !i2c_dev->msg_read) *dma_buf++ = packet_header; else i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); packet_header = msg->len - 1; - if (i2c_dev->is_curr_dma_xfer && !i2c_dev->msg_read) + if (i2c_dev->dma_mode && !i2c_dev->msg_read) *dma_buf++ = packet_header; else i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); @@ -1139,7 +1139,7 @@ static void tegra_i2c_push_packet_header(struct tegra_i2c_dev *i2c_dev, if (msg->flags & I2C_M_RD) packet_header |= I2C_HEADER_READ; - if (i2c_dev->is_curr_dma_xfer && !i2c_dev->msg_read) + if (i2c_dev->dma_mode && !i2c_dev->msg_read) *dma_buf++ = packet_header; else i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); @@ -1155,7 +1155,7 @@ static int tegra_i2c_error_recover(struct tegra_i2c_dev *i2c_dev, /* start recovery upon arbitration loss in single master mode */ if (i2c_dev->msg_err == I2C_ERR_ARBITRATION_LOST) { - if (!i2c_dev->is_multimaster_mode) + if (!i2c_dev->multimaster_mode) return i2c_recover_bus(&i2c_dev->adapter); return -EAGAIN; @@ -1196,9 +1196,9 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, xfer_size = msg->len + I2C_PACKET_HEADER_SIZE; xfer_size = ALIGN(xfer_size, BYTES_PER_FIFO_WORD); - i2c_dev->is_curr_dma_xfer = (xfer_size > I2C_PIO_MODE_PREFERRED_LEN) && - i2c_dev->dma_buf && - !i2c_dev->is_curr_atomic_xfer; + i2c_dev->dma_mode = (xfer_size > I2C_PIO_MODE_PREFERRED_LEN) && + i2c_dev->dma_buf && !i2c_dev->atomic_mode; + tegra_i2c_config_fifo_trig(i2c_dev, xfer_size); /* @@ -1210,7 +1210,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, int_mask = I2C_INT_NO_ACK | I2C_INT_ARBITRATION_LOST; tegra_i2c_unmask_irq(i2c_dev, int_mask); - if (i2c_dev->is_curr_dma_xfer) { + if (i2c_dev->dma_mode) { if (i2c_dev->msg_read) { dma_sync_single_for_device(i2c_dev->dev, i2c_dev->dma_phys, @@ -1235,7 +1235,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, tegra_i2c_push_packet_header(i2c_dev, msg, end_state); if (!i2c_dev->msg_read) { - if (i2c_dev->is_curr_dma_xfer) { + if (i2c_dev->dma_mode) { memcpy(i2c_dev->dma_buf + I2C_PACKET_HEADER_SIZE, msg->buf, msg->len); dma_sync_single_for_device(i2c_dev->dev, @@ -1256,7 +1256,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->hw->has_per_pkt_xfer_complete_irq) int_mask |= I2C_INT_PACKET_XFER_COMPLETE; - if (!i2c_dev->is_curr_dma_xfer) { + if (!i2c_dev->dma_mode) { if (msg->flags & I2C_M_RD) int_mask |= I2C_INT_RX_FIFO_DATA_REQ; else if (i2c_dev->msg_buf_remaining) @@ -1267,7 +1267,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, dev_dbg(i2c_dev->dev, "unmasked irq: %02x\n", i2c_readl(i2c_dev, I2C_INT_MASK)); - if (i2c_dev->is_curr_dma_xfer) { + if (i2c_dev->dma_mode) { time_left = tegra_i2c_wait_completion(i2c_dev, &i2c_dev->dma_complete, xfer_time); @@ -1316,7 +1316,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, time_left, completion_done(&i2c_dev->msg_complete), i2c_dev->msg_err); - i2c_dev->is_curr_dma_xfer = false; + i2c_dev->dma_mode = false; err = tegra_i2c_error_recover(i2c_dev, msg); if (err) @@ -1363,9 +1363,9 @@ static int tegra_i2c_xfer_atomic(struct i2c_adapter *adap, struct tegra_i2c_dev *i2c_dev = i2c_get_adapdata(adap); int ret; - i2c_dev->is_curr_atomic_xfer = true; + i2c_dev->atomic_mode = true; ret = tegra_i2c_xfer(adap, msgs, num); - i2c_dev->is_curr_atomic_xfer = false; + i2c_dev->atomic_mode = false; return ret; } @@ -1590,15 +1590,15 @@ static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev) { struct device_node *np = i2c_dev->dev->of_node; bool multi_mode; - int ret; + int err; - ret = of_property_read_u32(np, "clock-frequency", + err = of_property_read_u32(np, "clock-frequency", &i2c_dev->bus_clk_rate); - if (ret) + if (err) i2c_dev->bus_clk_rate = I2C_MAX_STANDARD_MODE_FREQ; /* default clock rate */ multi_mode = of_property_read_bool(np, "multi-master"); - i2c_dev->is_multimaster_mode = multi_mode; + i2c_dev->multimaster_mode = multi_mode; if (of_device_is_compatible(np, "nvidia,tegra20-i2c-dvc")) i2c_dev->is_dvc = true; @@ -1630,7 +1630,7 @@ static int tegra_i2c_init_clocks(struct tegra_i2c_dev *i2c_dev) i2c_dev->div_clk = i2c_dev->clocks[0].clk; - if (!i2c_dev->is_multimaster_mode) + if (!i2c_dev->multimaster_mode) return 0; err = clk_enable(i2c_dev->div_clk); @@ -1649,7 +1649,7 @@ static int tegra_i2c_init_clocks(struct tegra_i2c_dev *i2c_dev) static void tegra_i2c_release_clocks(struct tegra_i2c_dev *i2c_dev) { - if (i2c_dev->is_multimaster_mode) + if (i2c_dev->multimaster_mode) clk_disable(i2c_dev->div_clk); clk_bulk_unprepare(i2c_dev->nclocks, i2c_dev->clocks); @@ -1674,7 +1674,7 @@ static int tegra_i2c_probe(struct platform_device *pdev) { struct tegra_i2c_dev *i2c_dev; struct resource *res; - int ret; + int err; i2c_dev = devm_kzalloc(&pdev->dev, sizeof(*i2c_dev), GFP_KERNEL); if (!i2c_dev) @@ -1695,36 +1695,36 @@ static int tegra_i2c_probe(struct platform_device *pdev) i2c_dev->base_phys = res->start; - ret = platform_get_irq(pdev, 0); - if (ret < 0) - return ret; + err = platform_get_irq(pdev, 0); + if (err < 0) + return err; - i2c_dev->irq = ret; + i2c_dev->irq = err; /* interrupt will be enabled during of transfer time */ irq_set_status_flags(i2c_dev->irq, IRQ_NOAUTOEN); - ret = devm_request_irq(&pdev->dev, i2c_dev->irq, tegra_i2c_isr, - IRQF_NO_SUSPEND, dev_name(&pdev->dev), + err = devm_request_irq(i2c_dev->dev, i2c_dev->irq, tegra_i2c_isr, + IRQF_NO_SUSPEND, dev_name(i2c_dev->dev), i2c_dev); - if (ret) - return ret; + if (err) + return err; - i2c_dev->rst = devm_reset_control_get_exclusive(&pdev->dev, "i2c"); + i2c_dev->rst = devm_reset_control_get_exclusive(i2c_dev->dev, "i2c"); if (IS_ERR(i2c_dev->rst)) { - dev_err_probe(&pdev->dev, PTR_ERR(i2c_dev->rst), + dev_err_probe(i2c_dev->dev, PTR_ERR(i2c_dev->rst), "failed to get reset control\n"); return PTR_ERR(i2c_dev->rst); } tegra_i2c_parse_dt(i2c_dev); - ret = tegra_i2c_init_clocks(i2c_dev); - if (ret) - return ret; + err = tegra_i2c_init_clocks(i2c_dev); + if (err) + return err; - ret = tegra_i2c_init_dma(i2c_dev); - if (ret) + err = tegra_i2c_init_dma(i2c_dev); + if (err) goto release_clocks; /* @@ -1735,16 +1735,16 @@ static int tegra_i2c_probe(struct platform_device *pdev) * not be used for atomic transfers. */ if (!i2c_dev->is_vi) - pm_runtime_irq_safe(&pdev->dev); - pm_runtime_enable(&pdev->dev); + pm_runtime_irq_safe(i2c_dev->dev); + pm_runtime_enable(i2c_dev->dev); - ret = tegra_i2c_init_hardware(i2c_dev); - if (ret) + err = tegra_i2c_init_hardware(i2c_dev); + if (err) goto release_rpm; i2c_set_adapdata(&i2c_dev->adapter, i2c_dev); - i2c_dev->adapter.dev.of_node = pdev->dev.of_node; - i2c_dev->adapter.dev.parent = &pdev->dev; + i2c_dev->adapter.dev.of_node = i2c_dev->dev->of_node; + i2c_dev->adapter.dev.parent = i2c_dev->dev; i2c_dev->adapter.retries = 1; i2c_dev->adapter.timeout = 6 * HZ; i2c_dev->adapter.quirks = i2c_dev->hw->quirks; @@ -1756,23 +1756,23 @@ static int tegra_i2c_probe(struct platform_device *pdev) if (i2c_dev->hw->supports_bus_clear) i2c_dev->adapter.bus_recovery_info = &tegra_i2c_recovery_info; - strlcpy(i2c_dev->adapter.name, dev_name(&pdev->dev), + strlcpy(i2c_dev->adapter.name, dev_name(i2c_dev->dev), sizeof(i2c_dev->adapter.name)); - ret = i2c_add_numbered_adapter(&i2c_dev->adapter); - if (ret) + err = i2c_add_numbered_adapter(&i2c_dev->adapter); + if (err) goto release_rpm; return 0; release_rpm: - pm_runtime_disable(&pdev->dev); + pm_runtime_disable(i2c_dev->dev); tegra_i2c_release_dma(i2c_dev); release_clocks: tegra_i2c_release_clocks(i2c_dev); - return ret; + return err; } static int tegra_i2c_remove(struct platform_device *pdev) @@ -1781,7 +1781,7 @@ static int tegra_i2c_remove(struct platform_device *pdev) i2c_del_adapter(&i2c_dev->adapter); - pm_runtime_disable(&pdev->dev); + pm_runtime_disable(i2c_dev->dev); tegra_i2c_release_dma(i2c_dev); tegra_i2c_release_clocks(i2c_dev); @@ -1791,15 +1791,15 @@ static int tegra_i2c_remove(struct platform_device *pdev) static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) { struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); - int ret; + int err; - ret = pinctrl_pm_select_default_state(i2c_dev->dev); - if (ret) - return ret; + err = pinctrl_pm_select_default_state(dev); + if (err) + return err; - ret = clk_bulk_enable(i2c_dev->nclocks, i2c_dev->clocks); - if (ret) - return ret; + err = clk_bulk_enable(i2c_dev->nclocks, i2c_dev->clocks); + if (err) + return err; /* * VI I2C device is attached to VE power domain which goes through @@ -1808,8 +1808,8 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) * domain ON. */ if (i2c_dev->is_vi) { - ret = tegra_i2c_init(i2c_dev); - if (ret) + err = tegra_i2c_init(i2c_dev); + if (err) goto disable_clocks; } @@ -1818,7 +1818,7 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) disable_clocks: clk_bulk_disable(i2c_dev->nclocks, i2c_dev->clocks); - return ret; + return err; } static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) @@ -1827,20 +1827,23 @@ static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) clk_bulk_disable(i2c_dev->nclocks, i2c_dev->clocks); - return pinctrl_pm_select_idle_state(i2c_dev->dev); + return pinctrl_pm_select_idle_state(dev); } static int __maybe_unused tegra_i2c_suspend(struct device *dev) { struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); - int err = 0; + int err; i2c_mark_adapter_suspended(&i2c_dev->adapter); - if (!pm_runtime_status_suspended(dev)) + if (!pm_runtime_status_suspended(dev)) { err = tegra_i2c_runtime_suspend(dev); + if (err) + return err; + } - return err; + return 0; } static int __maybe_unused tegra_i2c_resume(struct device *dev) From 76d06443cc5b3727e7eb8de1f2313d0d67388865 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:12 +0300 Subject: [PATCH 151/243] i2c: tegra: Clean up printk messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch unifies style of all messages in the driver by starting them with a lowercase letter and using consistent capitalization and wording for all messages. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 37 +++++++++++++--------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 1a1388339660..dbf83424fb2f 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -382,7 +382,8 @@ static int tegra_i2c_dma_submit(struct tegra_i2c_dev *i2c_dev, size_t len) len, dir, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!dma_desc) { - dev_err(i2c_dev->dev, "failed to get DMA descriptor\n"); + dev_err(i2c_dev->dev, "failed to get %s DMA descriptor\n", + i2c_dev->msg_read ? "RX" : "TX"); return -EINVAL; } @@ -423,7 +424,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) return 0; if (!IS_ENABLED(CONFIG_TEGRA20_APB_DMA)) { - dev_dbg(i2c_dev->dev, "Support for APB DMA not enabled!\n"); + dev_dbg(i2c_dev->dev, "DMA support not enabled\n"); return 0; } @@ -449,7 +450,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) dma_buf = dma_alloc_coherent(i2c_dev->dev, i2c_dev->dma_buf_size, &dma_phys, GFP_KERNEL | __GFP_NOWARN); if (!dma_buf) { - dev_err(i2c_dev->dev, "failed to allocate the DMA buffer\n"); + dev_err(i2c_dev->dev, "failed to allocate DMA buffer\n"); err = -ENOMEM; goto err_out; } @@ -573,7 +574,7 @@ static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) err = tegra_i2c_poll_register(i2c_dev, I2C_CONFIG_LOAD, 0xffffffff, 1000, I2C_CONFIG_LOAD_TIMEOUT); if (err) { - dev_warn(i2c_dev->dev, "timeout waiting for config load\n"); + dev_err(i2c_dev->dev, "failed to load config\n"); return err; } @@ -836,7 +837,7 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) status = i2c_readl(i2c_dev, I2C_INT_STATUS); if (status == 0) { - dev_warn(i2c_dev->dev, "irq status 0 %08x %08x %08x\n", + dev_warn(i2c_dev->dev, "IRQ status 0 %08x %08x %08x\n", i2c_readl(i2c_dev, I2C_PACKET_TRANSFER_STATUS), i2c_readl(i2c_dev, I2C_STATUS), i2c_readl(i2c_dev, I2C_CNFG)); @@ -980,8 +981,7 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, slv_config.device_fc = true; err = dmaengine_slave_config(chan, &slv_config); if (err) { - dev_err(i2c_dev->dev, "DMA slave config failed: %d\n", - err); + dev_err(i2c_dev->dev, "DMA config failed: %d\n", err); dev_err(i2c_dev->dev, "falling back to PIO\n"); tegra_i2c_release_dma(i2c_dev); i2c_dev->dma_mode = false; @@ -1080,14 +1080,13 @@ static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) tegra_i2c_mask_irq(i2c_dev, I2C_INT_BUS_CLR_DONE); if (time_left == 0) { - dev_err(i2c_dev->dev, "timed out for bus clear\n"); + dev_err(i2c_dev->dev, "failed to clear bus\n"); return -ETIMEDOUT; } val = i2c_readl(i2c_dev, I2C_BUS_CLEAR_STATUS); if (!(val & I2C_BC_STATUS)) { - dev_err(i2c_dev->dev, - "un-recovered arbitration lost\n"); + dev_err(i2c_dev->dev, "un-recovered arbitration lost\n"); return -EIO; } @@ -1217,12 +1216,8 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, xfer_size, DMA_FROM_DEVICE); err = tegra_i2c_dma_submit(i2c_dev, xfer_size); - if (err) { - dev_err(i2c_dev->dev, - "starting RX DMA failed, err %d\n", - err); + if (err) return err; - } } else { dma_sync_single_for_cpu(i2c_dev->dev, @@ -1243,12 +1238,8 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, xfer_size, DMA_TO_DEVICE); err = tegra_i2c_dma_submit(i2c_dev, xfer_size); - if (err) { - dev_err(i2c_dev->dev, - "starting TX DMA failed, err %d\n", - err); + if (err) return err; - } } else { tegra_i2c_fill_tx_fifo(i2c_dev); } @@ -1264,7 +1255,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, } tegra_i2c_unmask_irq(i2c_dev, int_mask); - dev_dbg(i2c_dev->dev, "unmasked irq: %02x\n", + dev_dbg(i2c_dev->dev, "unmasked IRQ: %02x\n", i2c_readl(i2c_dev, I2C_INT_MASK)); if (i2c_dev->dma_mode) { @@ -1286,7 +1277,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, i2c_dev->tx_dma_chan); if (!time_left && !completion_done(&i2c_dev->dma_complete)) { - dev_err(i2c_dev->dev, "DMA transfer timeout\n"); + dev_err(i2c_dev->dev, "DMA transfer timed out\n"); tegra_i2c_init(i2c_dev); return -ETIMEDOUT; } @@ -1307,7 +1298,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, tegra_i2c_mask_irq(i2c_dev, int_mask); if (time_left == 0) { - dev_err(i2c_dev->dev, "i2c transfer timed out\n"); + dev_err(i2c_dev->dev, "I2C transfer timed out\n"); tegra_i2c_init(i2c_dev); return -ETIMEDOUT; } From 94a5573f0719cf82143d401658d0ad2940220b8d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:13 +0300 Subject: [PATCH 152/243] i2c: tegra: Clean up and improve comments Make all comments to be consistent in regards to capitalization and punctuation, correct spelling and grammar errors, improve wording. Reviewed-by: Thierry Reding Reviewed-by: Andy Shevchenko Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 88 ++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 41 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index dbf83424fb2f..194c9ec84713 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -136,7 +136,7 @@ /* configuration load timeout in microseconds */ #define I2C_CONFIG_LOAD_TIMEOUT 1000000 -/* Packet header size in bytes */ +/* packet header size in bytes */ #define I2C_PACKET_HEADER_SIZE 12 /* @@ -148,11 +148,10 @@ #define I2C_PIO_MODE_PREFERRED_LEN 32 /* - * msg_end_type: The bus control which need to be send at end of transfer. - * @MSG_END_STOP: Send stop pulse at end of transfer. - * @MSG_END_REPEAT_START: Send repeat start at end of transfer. - * @MSG_END_CONTINUE: The following on message is coming and so do not send - * stop or repeat start. + * msg_end_type: The bus control which needs to be sent at end of transfer. + * @MSG_END_STOP: Send stop pulse. + * @MSG_END_REPEAT_START: Send repeat-start. + * @MSG_END_CONTINUE: Don't send stop or repeat-start. */ enum msg_end_type { MSG_END_STOP, @@ -161,10 +160,10 @@ enum msg_end_type { }; /** - * struct tegra_i2c_hw_feature : Different HW support on Tegra - * @has_continue_xfer_support: Continue transfer supports. + * struct tegra_i2c_hw_feature : per hardware generation features + * @has_continue_xfer_support: continue-transfer supported * @has_per_pkt_xfer_complete_irq: Has enable/disable capability for transfer - * complete interrupt per packet basis. + * completion interrupt on per packet basis. * @has_config_load_reg: Has the config load register to load the new * configuration. * @clk_divisor_hs_mode: Clock divisor in HS mode. @@ -184,7 +183,7 @@ enum msg_end_type { * @has_mst_fifo: The I2C controller contains the new MST FIFO interface that * provides additional features and allows for longer messages to * be transferred in one go. - * @quirks: i2c adapter quirks for limiting write/read transfer size and not + * @quirks: I2C adapter quirks for limiting write/read transfer size and not * allowing 0 length transfers. * @supports_bus_clear: Bus Clear support to recover from bus hang during * SDA stuck low from device for some unknown reasons. @@ -245,7 +244,7 @@ struct tegra_i2c_hw_feature { * @msg_err: error code for completed message * @msg_buf: pointer to current message data * @msg_buf_remaining: size of unsent data in the message buffer - * @msg_read: identifies read transfers + * @msg_read: indicates that the transfer is a read access * @bus_clk_rate: current I2C bus clock rate * @multimaster_mode: indicates that I2C controller is in multi-master mode * @tx_dma_chan: DMA transmit channel @@ -307,8 +306,8 @@ static u32 dvc_readl(struct tegra_i2c_dev *i2c_dev, unsigned int reg) } /* - * i2c_writel and i2c_readl will offset the register if necessary to talk - * to the I2C block inside the DVC block + * If necessary, i2c_writel() and i2c_readl() will offset the register + * in order to talk to the I2C block inside the DVC block. */ static u32 tegra_i2c_reg_addr(struct tegra_i2c_dev *i2c_dev, unsigned int reg) { @@ -323,7 +322,7 @@ static void i2c_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned int reg) { writel_relaxed(val, i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); - /* Read back register to make sure that register writes completed */ + /* read back register to make sure that register writes completed */ if (reg != I2C_TX_FIFO) readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); } @@ -475,7 +474,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) * block. This block is identical to the rest of the I2C blocks, except that * it only supports master mode, it has registers moved around, and it needs * some extra init to get it into I2C mode. The register moves are handled - * by i2c_readl and i2c_writel + * by i2c_readl() and i2c_writel(). */ static void tegra_dvc_init(struct tegra_i2c_dev *i2c_dev) { @@ -633,7 +632,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) break; } - /* Make sure clock divisor programmed correctly */ + /* make sure clock divisor programmed correctly */ clk_divisor = FIELD_PREP(I2C_CLK_DIVISOR_HSMODE, i2c_dev->hw->clk_divisor_hs_mode) | FIELD_PREP(I2C_CLK_DIVISOR_STD_FAST_MODE, non_hs_mode); @@ -646,8 +645,8 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) } /* - * configure setup and hold times only when tsu_thd is non-zero. - * otherwise, preserve the chip default values + * Configure setup and hold times only when tsu_thd is non-zero. + * Otherwise, preserve the chip default values. */ if (i2c_dev->hw->has_interface_timing_reg && tsu_thd) i2c_writel(i2c_dev, tsu_thd, I2C_INTERFACE_TIMING_1); @@ -691,7 +690,7 @@ static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev) /* * NACK interrupt is generated before the I2C controller generates - * the STOP condition on the bus. So wait for 2 clock periods + * the STOP condition on the bus. So, wait for 2 clock periods * before disabling the controller so that the STOP condition has * been delivered properly. */ @@ -712,8 +711,8 @@ static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) u32 val; /* - * Catch overflow due to message fully sent - * before the check for RX FIFO availability. + * Catch overflow due to message fully sent before the check for + * RX FIFO availability. */ if (WARN_ON_ONCE(!(i2c_dev->msg_buf_remaining))) return -EINVAL; @@ -726,7 +725,7 @@ static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) rx_fifo_avail = FIELD_GET(I2C_FIFO_STATUS_RX, val); } - /* Rounds down to not include partial word at the end of buf */ + /* round down to exclude partial word at the end of buffer */ words_to_transfer = buf_remaining / BYTES_PER_FIFO_WORD; if (words_to_transfer > rx_fifo_avail) words_to_transfer = rx_fifo_avail; @@ -738,8 +737,8 @@ static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) rx_fifo_avail -= words_to_transfer; /* - * If there is a partial word at the end of buf, handle it manually to - * prevent overwriting past the end of buf + * If there is a partial word at the end of buffer, handle it + * manually to prevent overwriting past the end of buffer. */ if (rx_fifo_avail > 0 && buf_remaining > 0) { /* @@ -779,10 +778,15 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) tx_fifo_avail = FIELD_GET(I2C_FIFO_STATUS_TX, val); } - /* Rounds down to not include partial word at the end of buf */ + /* round down to exclude partial word at the end of buffer */ words_to_transfer = buf_remaining / BYTES_PER_FIFO_WORD; - /* It's very common to have < 4 bytes, so optimize that case. */ + /* + * This hunk pushes 4 bytes at a time into the TX FIFO. + * + * It's very common to have < 4 bytes, hence there is no word + * to push if we have less than 4 bytes to transfer. + */ if (words_to_transfer) { if (words_to_transfer > tx_fifo_avail) words_to_transfer = tx_fifo_avail; @@ -806,8 +810,8 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) } /* - * If there is a partial word at the end of buf, handle it manually to - * prevent reading past the end of buf, which could cross a page + * If there is a partial word at the end of buffer, handle it manually + * to prevent reading past the end of buffer, which could cross a page * boundary and fault. */ if (tx_fifo_avail > 0 && buf_remaining > 0) { @@ -855,7 +859,7 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) } /* - * I2C transfer is terminated during the bus clear so skip + * I2C transfer is terminated during the bus clear, so skip * processing the other interrupts. */ if (i2c_dev->hw->supports_bus_clear && (status & I2C_INT_BUS_CLR_DONE)) @@ -891,7 +895,8 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) * During message read XFER_COMPLETE interrupt is triggered prior to * DMA completion and during message write XFER_COMPLETE interrupt is * triggered after DMA completion. - * PACKETS_XFER_COMPLETE indicates completion of all bytes of transfer. + * + * PACKETS_XFER_COMPLETE indicates completion of all bytes of transfer, * so forcing msg_buf_remaining to 0 in DMA mode. */ if (status & I2C_INT_PACKET_XFER_COMPLETE) { @@ -909,7 +914,7 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) } goto done; err: - /* An error occurred, mask all interrupts */ + /* mask all interrupts on error */ tegra_i2c_mask_irq(i2c_dev, I2C_INT_NO_ACK | I2C_INT_ARBITRATION_LOST | I2C_INT_PACKET_XFER_COMPLETE | I2C_INT_TX_FIFO_DATA_REQ | I2C_INT_RX_FIFO_DATA_REQ); @@ -1333,6 +1338,7 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], enum msg_end_type end_type = MSG_END_STOP; if (i < (num - 1)) { + /* check whether follow up message is coming */ if (msgs[i + 1].flags & I2C_M_NOSTART) end_type = MSG_END_CONTINUE; else @@ -1562,7 +1568,6 @@ static const struct tegra_i2c_hw_feature tegra194_i2c_hw = { .has_interface_timing_reg = true, }; -/* Match table for of_platform binding */ static const struct of_device_id tegra_i2c_of_match[] = { { .compatible = "nvidia,tegra194-i2c", .data = &tegra194_i2c_hw, }, { .compatible = "nvidia,tegra186-i2c", .data = &tegra186_i2c_hw, }, @@ -1586,7 +1591,7 @@ static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev) err = of_property_read_u32(np, "clock-frequency", &i2c_dev->bus_clk_rate); if (err) - i2c_dev->bus_clk_rate = I2C_MAX_STANDARD_MODE_FREQ; /* default clock rate */ + i2c_dev->bus_clk_rate = I2C_MAX_STANDARD_MODE_FREQ; multi_mode = of_property_read_bool(np, "multi-master"); i2c_dev->multimaster_mode = multi_mode; @@ -1719,11 +1724,13 @@ static int tegra_i2c_probe(struct platform_device *pdev) goto release_clocks; /* - * VI I2C is in VE power domain which is not always on and not - * an IRQ safe. So, IRQ safe device can't be attached to a non-IRQ - * safe domain as it prevents powering off the PM domain. - * Also, VI I2C device don't need to use runtime IRQ safe as it will - * not be used for atomic transfers. + * VI I2C is in VE power domain which is not always ON and not + * IRQ-safe. Thus, IRQ-safe device shouldn't be attached to a + * non IRQ-safe domain because this prevents powering off the power + * domain. + * + * VI I2C device shouldn't be marked as IRQ-safe because VI I2C won't + * be used for atomic transfers. */ if (!i2c_dev->is_vi) pm_runtime_irq_safe(i2c_dev->dev); @@ -1794,9 +1801,8 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) /* * VI I2C device is attached to VE power domain which goes through - * power ON/OFF during PM runtime resume/suspend. So, controller - * should go through reset and need to re-initialize after power - * domain ON. + * power ON/OFF during runtime PM resume/suspend, meaning that + * controller needs to be re-initialized after power ON. */ if (i2c_dev->is_vi) { err = tegra_i2c_init(i2c_dev); From c886a4a03a0155d5630a613b1f1c3f4fdde80b0a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:14 +0300 Subject: [PATCH 153/243] i2c: tegra: Clean up whitespaces, newlines and indentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some places in the code are missing newlines or have unnecessary whitespaces and newlines. This creates inconsistency of the code and hurts readability. This patch removes the unnecessary and adds necessary whitespaces / newlines, clears indentation of the code. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 73 +++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 194c9ec84713..b88b38a45fb5 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -315,6 +315,7 @@ static u32 tegra_i2c_reg_addr(struct tegra_i2c_dev *i2c_dev, unsigned int reg) reg += (reg >= I2C_TX_FIFO) ? 0x10 : 0x40; else if (i2c_dev->is_vi) reg = 0xc00 + (reg << 2); + return reg; } @@ -374,9 +375,12 @@ static int tegra_i2c_dma_submit(struct tegra_i2c_dev *i2c_dev, size_t len) struct dma_chan *chan; dev_dbg(i2c_dev->dev, "starting DMA for length: %zu\n", len); + reinit_completion(&i2c_dev->dma_complete); + dir = i2c_dev->msg_read ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV; chan = i2c_dev->msg_read ? i2c_dev->rx_dma_chan : i2c_dev->tx_dma_chan; + dma_desc = dmaengine_prep_slave_single(chan, i2c_dev->dma_phys, len, dir, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -388,8 +392,10 @@ static int tegra_i2c_dma_submit(struct tegra_i2c_dev *i2c_dev, size_t len) dma_desc->callback = tegra_i2c_dma_complete; dma_desc->callback_param = i2c_dev; + dmaengine_submit(dma_desc); dma_async_issue_pending(chan); + return 0; } @@ -456,6 +462,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) i2c_dev->dma_buf = dma_buf; i2c_dev->dma_phys = dma_phys; + return 0; err_out: @@ -558,6 +565,7 @@ static int tegra_i2c_flush_fifos(struct tegra_i2c_dev *i2c_dev) dev_err(i2c_dev->dev, "failed to flush FIFO\n"); return err; } + return 0; } @@ -651,8 +659,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) if (i2c_dev->hw->has_interface_timing_reg && tsu_thd) i2c_writel(i2c_dev, tsu_thd, I2C_INTERFACE_TIMING_1); - clk_multiplier = tlow + thigh + 2; - clk_multiplier *= non_hs_mode + 1; + clk_multiplier = (tlow + thigh + 2) * (non_hs_mode + 1); err = clk_set_rate(i2c_dev->div_clk, i2c_dev->bus_clk_rate * clk_multiplier); @@ -800,9 +807,9 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) */ buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD; tx_fifo_avail -= words_to_transfer; + i2c_dev->msg_buf_remaining = buf_remaining; - i2c_dev->msg_buf = buf + - words_to_transfer * BYTES_PER_FIFO_WORD; + i2c_dev->msg_buf = buf + words_to_transfer * BYTES_PER_FIFO_WORD; i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); @@ -915,12 +922,18 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) goto done; err: /* mask all interrupts on error */ - tegra_i2c_mask_irq(i2c_dev, I2C_INT_NO_ACK | I2C_INT_ARBITRATION_LOST | - I2C_INT_PACKET_XFER_COMPLETE | I2C_INT_TX_FIFO_DATA_REQ | - I2C_INT_RX_FIFO_DATA_REQ); + tegra_i2c_mask_irq(i2c_dev, + I2C_INT_NO_ACK | + I2C_INT_ARBITRATION_LOST | + I2C_INT_PACKET_XFER_COMPLETE | + I2C_INT_TX_FIFO_DATA_REQ | + I2C_INT_RX_FIFO_DATA_REQ); + if (i2c_dev->hw->supports_bus_clear) tegra_i2c_mask_irq(i2c_dev, I2C_INT_BUS_CLR_DONE); + i2c_writel(i2c_dev, status, I2C_INT_STATUS); + if (i2c_dev->is_dvc) dvc_writel(i2c_dev, DVC_STATUS_I2C_DONE_INTR, DVC_STATUS); @@ -962,6 +975,7 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->msg_read) { chan = i2c_dev->rx_dma_chan; reg_offset = tegra_i2c_reg_addr(i2c_dev, I2C_RX_FIFO); + slv_config.src_addr = i2c_dev->base_phys + reg_offset; slv_config.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; slv_config.src_maxburst = dma_burst; @@ -973,6 +987,7 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, } else { chan = i2c_dev->tx_dma_chan; reg_offset = tegra_i2c_reg_addr(i2c_dev, I2C_TX_FIFO); + slv_config.dst_addr = i2c_dev->base_phys + reg_offset; slv_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; slv_config.dst_maxburst = dma_burst; @@ -988,6 +1003,7 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, if (err) { dev_err(i2c_dev->dev, "DMA config failed: %d\n", err); dev_err(i2c_dev->dev, "falling back to PIO\n"); + tegra_i2c_release_dma(i2c_dev); i2c_dev->dma_mode = false; } else { @@ -1069,6 +1085,7 @@ static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) int err; reinit_completion(&i2c_dev->msg_complete); + val = FIELD_PREP(I2C_BC_SCLK_THRESHOLD, 9) | I2C_BC_STOP_COND | I2C_BC_TERMINATE; i2c_writel(i2c_dev, val, I2C_BUS_CLEAR_CNFG); @@ -1200,7 +1217,8 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, xfer_size = msg->len + I2C_PACKET_HEADER_SIZE; xfer_size = ALIGN(xfer_size, BYTES_PER_FIFO_WORD); - i2c_dev->dma_mode = (xfer_size > I2C_PIO_MODE_PREFERRED_LEN) && + + i2c_dev->dma_mode = xfer_size > I2C_PIO_MODE_PREFERRED_LEN && i2c_dev->dma_buf && !i2c_dev->atomic_mode; tegra_i2c_config_fifo_trig(i2c_dev, xfer_size); @@ -1210,25 +1228,24 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, * Total bits = 9 bits per byte (including ACK bit) + Start & stop bits */ xfer_time += DIV_ROUND_CLOSEST(((xfer_size * 9) + 2) * MSEC_PER_SEC, - i2c_dev->bus_clk_rate); + i2c_dev->bus_clk_rate); int_mask = I2C_INT_NO_ACK | I2C_INT_ARBITRATION_LOST; tegra_i2c_unmask_irq(i2c_dev, int_mask); + if (i2c_dev->dma_mode) { if (i2c_dev->msg_read) { dma_sync_single_for_device(i2c_dev->dev, i2c_dev->dma_phys, - xfer_size, - DMA_FROM_DEVICE); + xfer_size, DMA_FROM_DEVICE); + err = tegra_i2c_dma_submit(i2c_dev, xfer_size); if (err) return err; - } else { dma_sync_single_for_cpu(i2c_dev->dev, i2c_dev->dma_phys, - xfer_size, - DMA_TO_DEVICE); + xfer_size, DMA_TO_DEVICE); } } @@ -1238,10 +1255,11 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->dma_mode) { memcpy(i2c_dev->dma_buf + I2C_PACKET_HEADER_SIZE, msg->buf, msg->len); + dma_sync_single_for_device(i2c_dev->dev, i2c_dev->dma_phys, - xfer_size, - DMA_TO_DEVICE); + xfer_size, DMA_TO_DEVICE); + err = tegra_i2c_dma_submit(i2c_dev, xfer_size); if (err) return err; @@ -1252,6 +1270,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->hw->has_per_pkt_xfer_complete_irq) int_mask |= I2C_INT_PACKET_XFER_COMPLETE; + if (!i2c_dev->dma_mode) { if (msg->flags & I2C_M_RD) int_mask |= I2C_INT_RX_FIFO_DATA_REQ; @@ -1290,10 +1309,9 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE) { dma_sync_single_for_cpu(i2c_dev->dev, i2c_dev->dma_phys, - xfer_size, - DMA_FROM_DEVICE); - memcpy(i2c_dev->msg_buf, i2c_dev->dma_buf, - msg->len); + xfer_size, DMA_FROM_DEVICE); + + memcpy(i2c_dev->msg_buf, i2c_dev->dma_buf, msg->len); } } @@ -1375,6 +1393,7 @@ static u32 tegra_i2c_func(struct i2c_adapter *adap) if (i2c_dev->hw->has_continue_xfer_support) ret |= I2C_FUNC_NOSTART; + return ret; } @@ -1734,6 +1753,7 @@ static int tegra_i2c_probe(struct platform_device *pdev) */ if (!i2c_dev->is_vi) pm_runtime_irq_safe(i2c_dev->dev); + pm_runtime_enable(i2c_dev->dev); err = tegra_i2c_init_hardware(i2c_dev); @@ -1778,11 +1798,11 @@ static int tegra_i2c_remove(struct platform_device *pdev) struct tegra_i2c_dev *i2c_dev = platform_get_drvdata(pdev); i2c_del_adapter(&i2c_dev->adapter); - pm_runtime_disable(i2c_dev->dev); tegra_i2c_release_dma(i2c_dev); tegra_i2c_release_clocks(i2c_dev); + return 0; } @@ -1883,15 +1903,14 @@ static const struct dev_pm_ops tegra_i2c_pm = { }; static struct platform_driver tegra_i2c_driver = { - .probe = tegra_i2c_probe, - .remove = tegra_i2c_remove, - .driver = { - .name = "tegra-i2c", + .probe = tegra_i2c_probe, + .remove = tegra_i2c_remove, + .driver = { + .name = "tegra-i2c", .of_match_table = tegra_i2c_of_match, - .pm = &tegra_i2c_pm, + .pm = &tegra_i2c_pm, }, }; - module_platform_driver(tegra_i2c_driver); MODULE_DESCRIPTION("nVidia Tegra2 I2C Bus Controller driver"); From 53fd42ff6ccce7b8f8c138206b02e25c14ef0b16 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Sep 2020 01:19:15 +0300 Subject: [PATCH 154/243] i2c: tegra: Improve driver module description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use proper spelling of "NVIDIA" and don't designate driver as Tegra2-only since newer SoC generations are supported as well. Reviewed-by: MichaÅ‚ MirosÅ‚aw Reviewed-by: Andy Shevchenko Acked-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index b88b38a45fb5..6f08c0c3238d 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1913,6 +1913,6 @@ static struct platform_driver tegra_i2c_driver = { }; module_platform_driver(tegra_i2c_driver); -MODULE_DESCRIPTION("nVidia Tegra2 I2C Bus Controller driver"); +MODULE_DESCRIPTION("NVIDIA Tegra I2C Bus Controller driver"); MODULE_AUTHOR("Colin Cross"); MODULE_LICENSE("GPL v2"); From 40daf09a30a0c86a038bcce606604333f32e03f8 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 2 Oct 2020 14:44:58 +0200 Subject: [PATCH 155/243] Documentation: i2c: add testunit docs to index Fixes: a8335c64c5f0 ("i2c: add slave testunit driver") Reported-by: Mauro Carvalho Chehab Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- Documentation/i2c/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/i2c/index.rst b/Documentation/i2c/index.rst index 8a2ad3845191..8b76217e370a 100644 --- a/Documentation/i2c/index.rst +++ b/Documentation/i2c/index.rst @@ -47,6 +47,7 @@ Slave I2C slave-interface slave-eeprom-backend + slave-testunit-backend Advanced topics =============== From 247db73560bc3e5aef6db50c443c3c0db115bc93 Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Mon, 5 Oct 2020 02:22:43 -0700 Subject: [PATCH 156/243] NFS: fix nfs_path in case of a rename retry We are generating incorrect path in case of rename retry because we are restarting from wrong dentry. We should restart from the dentry which was received in the call to nfs_path. CC: stable@vger.kernel.org Signed-off-by: Ashish Sangwan Signed-off-by: Anna Schumaker --- fs/nfs/namespace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 6b063227e34e..2bcbe38afe2e 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -32,9 +32,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - used to return pointer to the end of devname part of path - * @dentry - pointer to dentry + * @dentry_in - pointer to dentry * @buffer - result buffer - * @buflen - length of buffer + * @buflen_in - length of buffer * @flags - options (see below) * * Helper function for constructing the server pathname @@ -49,15 +49,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * the original device (export) name * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, - unsigned flags) +char *nfs_path(char **p, struct dentry *dentry_in, char *buffer, + ssize_t buflen_in, unsigned flags) { char *end; int namelen; unsigned seq; const char *base; + struct dentry *dentry; + ssize_t buflen; rename_retry: + buflen = buflen_in; + dentry = dentry_in; end = buffer+buflen; *--end = '\0'; buflen--; From dd841a749d1ded8e2e5facc4242ee0b6779fc0cb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 14 Jun 2020 06:07:10 -0400 Subject: [PATCH 157/243] radix tree test suite: Fix compilation Introducing local_lock broke compilation; fix it all up. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/radix-tree.h | 1 + lib/radix-tree.c | 1 - tools/testing/radix-tree/linux/kernel.h | 1 + tools/testing/radix-tree/linux/local_lock.h | 8 ++++++++ tools/testing/radix-tree/test.h | 4 ---- 5 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 tools/testing/radix-tree/linux/local_lock.h diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c2a9f7c90727..5c85059a92ba 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8e4a3a4397f2..0f10485d46b6 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -20,7 +20,6 @@ #include #include #include -#include #include /* in_interrupt() */ #include #include diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 4568248222ae..39867fd80c8f 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -22,4 +22,5 @@ #define __releases(x) #define __must_hold(x) +#define EXPORT_PER_CPU_SYMBOL_GPL(x) #endif /* _KERNEL_H */ diff --git a/tools/testing/radix-tree/linux/local_lock.h b/tools/testing/radix-tree/linux/local_lock.h new file mode 100644 index 000000000000..b3cf8b233ca4 --- /dev/null +++ b/tools/testing/radix-tree/linux/local_lock.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_LOCAL_LOCK +#define _LINUX_LOCAL_LOCK +typedef struct { } local_lock_t; + +static inline void local_lock(local_lock_t *lock) { } +static inline void local_unlock(local_lock_t *lock) { } +#define INIT_LOCAL_LOCK(x) { } +#endif diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 34dab4d18744..7ef7067e942c 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -56,8 +56,4 @@ int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long node_maxindex(struct radix_tree_node *); unsigned long shift_maxindex(unsigned int shift); int radix_tree_cpu_dead(unsigned int cpu); -struct radix_tree_preload { - unsigned nr; - struct radix_tree_node *nodes; -}; extern struct radix_tree_preload radix_tree_preloads; From a219b856a2b993da234108307be772448f22b0ce Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 2 Apr 2020 14:26:13 -0400 Subject: [PATCH 158/243] ida: Free allocated bitmap in error path If a bitmap needs to be allocated, and then by the time the thread is scheduled to be run again all the indices which would satisfy the allocation have been allocated then we would leak the allocation. Almost impossible to hit in practice, but a trivial fix. Found by Coverity. Fixes: f32f004cddf8 ("ida: Convert to XArray") Reported-by: coverity-bot Reviewed-by: Kees Cook Signed-off-by: Matthew Wilcox (Oracle) --- lib/idr.c | 1 + tools/testing/radix-tree/idr-test.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/lib/idr.c b/lib/idr.c index c2cf2c52bbde..4d2eef0259d2 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -470,6 +470,7 @@ int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max, goto retry; nospc: xas_unlock_irqrestore(&xas, flags); + kfree(alloc); return -ENOSPC; } EXPORT_SYMBOL(ida_alloc_range); diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 8995092d541e..3b796dd5e577 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -523,8 +523,27 @@ static void *ida_random_fn(void *arg) return NULL; } +static void *ida_leak_fn(void *arg) +{ + struct ida *ida = arg; + time_t s = time(NULL); + int i, ret; + + rcu_register_thread(); + + do for (i = 0; i < 1000; i++) { + ret = ida_alloc_range(ida, 128, 128, GFP_KERNEL); + if (ret >= 0) + ida_free(ida, 128); + } while (time(NULL) < s + 2); + + rcu_unregister_thread(); + return NULL; +} + void ida_thread_tests(void) { + DEFINE_IDA(ida); pthread_t threads[20]; int i; @@ -536,6 +555,16 @@ void ida_thread_tests(void) while (i--) pthread_join(threads[i], NULL); + + for (i = 0; i < ARRAY_SIZE(threads); i++) + if (pthread_create(&threads[i], NULL, ida_leak_fn, &ida)) { + perror("creating ida thread"); + exit(1); + } + + while (i--) + pthread_join(threads[i], NULL); + assert(ida_is_empty(&ida)); } void ida_tests(void) From 062b735912b9f3aa3e14cd02b5ede08cf8bc093f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 31 Mar 2020 14:23:59 -0400 Subject: [PATCH 159/243] XArray: Test two more things about xa_cmpxchg 1. If we xa_cmpxchg() an entry in, it marks the index as not free. 2. If we xa_cmpxchg() NULL in, it marks the index as free. Signed-off-by: Matthew Wilcox (Oracle) --- lib/test_xarray.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/test_xarray.c b/lib/test_xarray.c index d4f97925dbd8..9fc3da430aba 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -393,6 +393,9 @@ static noinline void check_cmpxchg(struct xarray *xa) XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE); XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != NULL); XA_BUG_ON(xa, xa_cmpxchg(xa, 5, NULL, FIVE, GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) != -EBUSY); + XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != FIVE); + XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) == -EBUSY); xa_erase_index(xa, 12345678); xa_erase_index(xa, 5); XA_BUG_ON(xa, !xa_empty(xa)); From f7d61ee414cadaeb05af3bf7a64fb99760b9c6e7 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 26 Jan 2015 17:26:19 -0500 Subject: [PATCH 160/243] SUNRPC: Split out a function for setting current page I'm going to need this bit of code in a few places for READ_PLUS decoding, so let's make it a helper function. Signed-off-by: Anna Schumaker --- net/sunrpc/xdr.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index be11d672b5b9..fa7517c1d125 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -825,6 +825,13 @@ static int xdr_set_page_base(struct xdr_stream *xdr, return 0; } +static void xdr_set_page(struct xdr_stream *xdr, unsigned int base, + unsigned int len) +{ + if (xdr_set_page_base(xdr, base, len) < 0) + xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); +} + static void xdr_set_next_page(struct xdr_stream *xdr) { unsigned int newbase; @@ -832,8 +839,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr) newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT; newbase -= xdr->buf->page_base; - if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); + xdr_set_page(xdr, newbase, PAGE_SIZE); } static bool xdr_set_next_buffer(struct xdr_stream *xdr) @@ -841,8 +847,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr) if (xdr->page_ptr != NULL) xdr_set_next_page(xdr); else if (xdr->iov == xdr->buf->head) { - if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); + xdr_set_page(xdr, 0, PAGE_SIZE); } return xdr->p != xdr->end; } From cf1f08cac375630af6b6307907a3fc20fcf847c7 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 17 Apr 2020 11:00:24 -0400 Subject: [PATCH 161/243] SUNRPC: Implement a xdr_page_pos() function I'll need this for READ_PLUS to help figure out the offset where page data is stored at, but it might also be useful for other things. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5a6a81b7cd9f..25a68dd87ecf 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -240,6 +240,7 @@ extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); +extern unsigned int xdr_page_pos(const struct xdr_stream *xdr); extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index fa7517c1d125..909920fab93b 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -505,6 +505,19 @@ unsigned int xdr_stream_pos(const struct xdr_stream *xdr) } EXPORT_SYMBOL_GPL(xdr_stream_pos); +/** + * xdr_page_pos - Return the current offset from the start of the xdr pages + * @xdr: pointer to struct xdr_stream + */ +unsigned int xdr_page_pos(const struct xdr_stream *xdr) +{ + unsigned int pos = xdr_stream_pos(xdr); + + WARN_ON(pos < xdr->buf->head[0].iov_len); + return pos - xdr->buf->head[0].iov_len; +} +EXPORT_SYMBOL_GPL(xdr_page_pos); + /** * xdr_init_encode - Initialize a struct xdr_stream for sending data. * @xdr: pointer to xdr_stream struct From a14a63594cc2e5bdcbb1543d29df945da71e380f Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 17 Apr 2020 11:01:50 -0400 Subject: [PATCH 162/243] NFS: Use xdr_page_pos() in NFSv4 decode_getacl() Signed-off-by: Anna Schumaker --- fs/nfs/nfs4xdr.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0b3510f62623..3336ea3407a0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5308,7 +5308,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, uint32_t attrlen, bitmap[3] = {0}; int status; - unsigned int pg_offset; res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -5316,9 +5315,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, xdr_enter_page(xdr, xdr->buf->page_len); - /* Calculate the offset of the page data */ - pg_offset = xdr->buf->head[0].iov_len; - if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) @@ -5331,7 +5327,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of * variable length bitmaps.*/ - res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset; + res->acl_data_offset = xdr_page_pos(xdr); res->acl_len = attrlen; /* Check for receive buffer overflow */ From c567552612ece787b178e3b147b5854ad422a836 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 28 May 2014 13:41:22 -0400 Subject: [PATCH 163/243] NFS: Add READ_PLUS data segment support This patch adds client support for decoding a single NFS4_CONTENT_DATA segment returned by the server. This is the simplest implementation possible, since it does not account for any hole segments in the reply. Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xdr.c | 141 ++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4client.c | 2 + fs/nfs/nfs4proc.c | 43 +++++++++++- fs/nfs/nfs4xdr.c | 1 + include/linux/nfs4.h | 2 +- include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 2 +- 7 files changed, 187 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index cc50085e151c..930b4ca212c1 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -45,6 +45,15 @@ #define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ encode_fallocate_maxsz) #define decode_deallocate_maxsz (op_decode_hdr_maxsz) +#define encode_read_plus_maxsz (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + 3) +#define NFS42_READ_PLUS_SEGMENT_SIZE (1 /* data_content4 */ + \ + 2 /* data_info4.di_offset */ + \ + 2 /* data_info4.di_length */) +#define decode_read_plus_maxsz (op_decode_hdr_maxsz + \ + 1 /* rpr_eof */ + \ + 1 /* rpr_contents count */ + \ + NFS42_READ_PLUS_SEGMENT_SIZE) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ @@ -128,6 +137,14 @@ decode_putfh_maxsz + \ decode_deallocate_maxsz + \ decode_getattr_maxsz) +#define NFS4_enc_read_plus_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_read_plus_maxsz) +#define NFS4_dec_read_plus_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_read_plus_maxsz) #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -324,6 +341,16 @@ static void encode_deallocate(struct xdr_stream *xdr, encode_fallocate(xdr, args); } +static void encode_read_plus(struct xdr_stream *xdr, + const struct nfs_pgio_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_READ_PLUS, decode_read_plus_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->stateid); + encode_uint64(xdr, args->offset); + encode_uint32(xdr, args->count); +} + static void encode_seek(struct xdr_stream *xdr, const struct nfs42_seek_args *args, struct compound_hdr *hdr) @@ -722,6 +749,28 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode READ_PLUS request + */ +static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const struct nfs_pgio_args *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_read_plus(xdr, args, &hdr); + + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->count, hdr.replen); + encode_nops(&hdr); +} + /* * Encode SEEK request */ @@ -970,6 +1019,71 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re return decode_op_hdr(xdr, OP_DEALLOCATE); } +static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res, + uint32_t *eof) +{ + uint32_t count, recvd; + uint64_t offset; + __be32 *p; + + p = xdr_inline_decode(xdr, 8 + 4); + if (unlikely(!p)) + return -EIO; + + p = xdr_decode_hyper(p, &offset); + count = be32_to_cpup(p); + recvd = xdr_read_pages(xdr, count); + res->count += recvd; + + if (count > recvd) { + dprintk("NFS: server cheating in read reply: " + "count %u > recvd %u\n", count, recvd); + *eof = 0; + return 1; + } + + return 0; +} + +static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) +{ + uint32_t eof, segments, type; + int status; + __be32 *p; + + status = decode_op_hdr(xdr, OP_READ_PLUS); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4 + 4); + if (unlikely(!p)) + return -EIO; + + eof = be32_to_cpup(p++); + segments = be32_to_cpup(p++); + if (segments == 0) + goto out; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + type = be32_to_cpup(p++); + if (type == NFS4_CONTENT_DATA) + status = decode_read_plus_data(xdr, res, &eof); + else + return -EINVAL; + + if (status) + return status; + if (segments > 1) + eof = 0; + +out: + res->eof = eof; + return 0; +} + static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) { int status; @@ -1146,6 +1260,33 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp, return status; } +/* + * Decode READ_PLUS request + */ +static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfs_pgio_res *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_read_plus(xdr, res); + if (!status) + status = res->count; +out: + return status; +} + /* * Decode SEEK request */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index daacc78a3d48..be7915c861ce 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1045,6 +1045,8 @@ static int nfs4_server_common_setup(struct nfs_server *server, server->caps |= server->nfs_client->cl_mvops->init_caps; if (server->flags & NFS_MOUNT_NORDIRPLUS) server->caps &= ~NFS_CAP_READDIRPLUS; + if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) + server->caps &= ~NFS_CAP_READ_PLUS; /* * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower * authentication. diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f7ef2ca699a5..d09fd3236820 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -70,6 +70,10 @@ #include "nfs4trace.h" +#ifdef CONFIG_NFS_V4_2 +#include "nfs42.h" +#endif /* CONFIG_NFS_V4_2 */ + #define NFSDBG_FACILITY NFSDBG_PROC #define NFS4_BITMASK_SZ 3 @@ -5272,28 +5276,60 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task, return true; } +static bool nfs4_read_plus_not_supported(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ + struct nfs_server *server = NFS_SERVER(hdr->inode); + struct rpc_message *msg = &task->tk_msg; + + if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] && + server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) { + server->caps &= ~NFS_CAP_READ_PLUS; + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + rpc_restart_call_prepare(task); + return true; + } + return false; +} + static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - dprintk("--> %s\n", __func__); if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; if (nfs4_read_stateid_changed(task, &hdr->args)) return -EAGAIN; + if (nfs4_read_plus_not_supported(task, hdr)) + return -EAGAIN; if (task->tk_status > 0) nfs_invalidate_atime(hdr->inode); return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : nfs4_read_done_cb(task, hdr); } +#ifdef CONFIG_NFS_V4_2 +static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg) +{ + if (server->caps & NFS_CAP_READ_PLUS) + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS]; + else + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; +} +#else +static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg) +{ + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; +} +#endif /* CONFIG_NFS_V4_2 */ + static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, struct rpc_message *msg) { hdr->timestamp = jiffies; if (!hdr->pgio_done_cb) hdr->pgio_done_cb = nfs4_read_done_cb; - msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg); nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); } @@ -10215,7 +10251,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_SEEK | NFS_CAP_LAYOUTSTATS | NFS_CAP_CLONE - | NFS_CAP_LAYOUTERROR, + | NFS_CAP_LAYOUTERROR + | NFS_CAP_READ_PLUS, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3336ea3407a0..c6dbfcae7517 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7615,6 +7615,7 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC42(SETXATTR, enc_setxattr, dec_setxattr), PROC42(LISTXATTRS, enc_listxattrs, dec_listxattrs), PROC42(REMOVEXATTR, enc_removexattr, dec_removexattr), + PROC42(READ_PLUS, enc_read_plus, dec_read_plus), }; static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)]; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index b8360be141da..9dc7eeac924f 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -551,13 +551,13 @@ enum { NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LAYOUTERROR, - NFSPROC4_CLNT_COPY_NOTIFY, NFSPROC4_CLNT_GETXATTR, NFSPROC4_CLNT_SETXATTR, NFSPROC4_CLNT_LISTXATTRS, NFSPROC4_CLNT_REMOVEXATTR, + NFSPROC4_CLNT_READ_PLUS, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7eae72a8762e..38e60ec742df 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -287,5 +287,6 @@ struct nfs_server { #define NFS_CAP_LAYOUTERROR (1U << 26) #define NFS_CAP_COPY_NOTIFY (1U << 27) #define NFS_CAP_XATTR (1U << 28) +#define NFS_CAP_READ_PLUS (1U << 29) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0599efd57eb9..d63cb862d58e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -657,7 +657,7 @@ struct nfs_pgio_args { struct nfs_pgio_res { struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; - __u32 count; + __u64 count; __u32 op_status; union { struct { From 06216ecbd93688f7acb617e186b9556a565a13bd Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 20 Apr 2020 17:38:17 -0400 Subject: [PATCH 164/243] SUNRPC: Split out xdr_realign_pages() from xdr_align_pages() I don't need the entire align pages code for READ_PLUS, so split out the part I do need so I don't need to reimplement anything. Signed-off-by: Anna Schumaker --- net/sunrpc/xdr.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 909920fab93b..d93bcad5ba9f 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -997,10 +997,25 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) } EXPORT_SYMBOL_GPL(xdr_inline_decode); +static void xdr_realign_pages(struct xdr_stream *xdr) +{ + struct xdr_buf *buf = xdr->buf; + struct kvec *iov = buf->head; + unsigned int cur = xdr_stream_pos(xdr); + unsigned int copied, offset; + + /* Realign pages to current pointer position */ + if (iov->iov_len > cur) { + offset = iov->iov_len - cur; + copied = xdr_shrink_bufhead(buf, offset); + trace_rpc_xdr_alignment(xdr, offset, copied); + xdr->nwords = XDR_QUADLEN(buf->len - cur); + } +} + static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) { struct xdr_buf *buf = xdr->buf; - struct kvec *iov; unsigned int nwords = XDR_QUADLEN(len); unsigned int cur = xdr_stream_pos(xdr); unsigned int copied, offset; @@ -1008,15 +1023,7 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) if (xdr->nwords == 0) return 0; - /* Realign pages to current pointer position */ - iov = buf->head; - if (iov->iov_len > cur) { - offset = iov->iov_len - cur; - copied = xdr_shrink_bufhead(buf, offset); - trace_rpc_xdr_alignment(xdr, offset, copied); - xdr->nwords = XDR_QUADLEN(buf->len - cur); - } - + xdr_realign_pages(xdr); if (nwords > xdr->nwords) { nwords = xdr->nwords; len = nwords << 2; From 43f0f0816cdbe7361dd17db3b4c1033446033ba6 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 6 May 2020 13:21:30 -0400 Subject: [PATCH 165/243] SUNRPC: Split out _shift_data_right_tail() xdr_shrink_pagelen() is very similar to what we need for hole expansion, so split out the common code into its own function that can be used by both functions. Signed-off-by: Anna Schumaker --- net/sunrpc/xdr.c | 68 +++++++++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index d93bcad5ba9f..10a88a67206a 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -266,6 +266,46 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, } while ((len -= copy) != 0); } +static unsigned int +_shift_data_right_tail(struct xdr_buf *buf, unsigned int pgfrom, size_t len) +{ + struct kvec *tail = buf->tail; + unsigned int tailbuf_len; + unsigned int result = 0; + size_t copy; + + tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len; + + /* Shift the tail first */ + if (tailbuf_len != 0) { + unsigned int free_space = tailbuf_len - tail->iov_len; + + if (len < free_space) + free_space = len; + if (len > free_space) + len = free_space; + + tail->iov_len += free_space; + copy = len; + + if (tail->iov_len > len) { + char *p = (char *)tail->iov_base + len; + memmove(p, tail->iov_base, tail->iov_len - free_space); + result += tail->iov_len - free_space; + } else + copy = tail->iov_len; + + /* Copy from the inlined pages into the tail */ + _copy_from_pages((char *)tail->iov_base, + buf->pages, + buf->page_base + pgfrom, + copy); + result += copy; + } + + return result; +} + /** * _copy_to_pages * @pages: array of pages @@ -446,39 +486,13 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) static unsigned int xdr_shrink_pagelen(struct xdr_buf *buf, size_t len) { - struct kvec *tail; - size_t copy; unsigned int pglen = buf->page_len; - unsigned int tailbuf_len; unsigned int result; - result = 0; - tail = buf->tail; if (len > buf->page_len) len = buf-> page_len; - tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len; - /* Shift the tail first */ - if (tailbuf_len != 0) { - unsigned int free_space = tailbuf_len - tail->iov_len; - - if (len < free_space) - free_space = len; - tail->iov_len += free_space; - - copy = len; - if (tail->iov_len > len) { - char *p = (char *)tail->iov_base + len; - memmove(p, tail->iov_base, tail->iov_len - len); - result += tail->iov_len - len; - } else - copy = tail->iov_len; - /* Copy from the inlined pages into the tail */ - _copy_from_pages((char *)tail->iov_base, - buf->pages, buf->page_base + pglen - len, - copy); - result += copy; - } + result = _shift_data_right_tail(buf, pglen - len, len); buf->page_len -= len; buf->buflen -= len; /* Have we truncated the message? */ From 84ce182ab85b8ad5002fb1125ba572df99dd0d1c Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 28 May 2014 13:38:53 -0400 Subject: [PATCH 166/243] SUNRPC: Add the ability to expand holes in data pages This patch adds the ability to "read a hole" into a set of XDR data pages by taking the following steps: 1) Shift all data after the current xdr->p to the right, possibly into the tail, 2) Zero the specified range, and 3) Update xdr->p to point beyond the hole. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 69 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 25a68dd87ecf..f9636d2a6d54 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -250,6 +250,7 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); /** * xdr_stream_remaining - Return the number of bytes remaining in the stream diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 10a88a67206a..1052ccdb4e99 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -390,6 +390,38 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) } EXPORT_SYMBOL_GPL(_copy_from_pages); +/** + * _zero_pages + * @pages: array of pages + * @pgbase: beginning page vector address + * @len: length + */ +static void +_zero_pages(struct page **pages, size_t pgbase, size_t len) +{ + struct page **page; + char *vpage; + size_t zero; + + page = pages + (pgbase >> PAGE_SHIFT); + pgbase &= ~PAGE_MASK; + + do { + zero = PAGE_SIZE - pgbase; + if (zero > len) + zero = len; + + vpage = kmap_atomic(*page); + memset(vpage + pgbase, 0, zero); + kunmap_atomic(vpage); + + flush_dcache_page(*page); + pgbase = 0; + page++; + + } while ((len -= zero) != 0); +} + /** * xdr_shrink_bufhead * @buf: xdr_buf @@ -1096,6 +1128,43 @@ unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len) } EXPORT_SYMBOL_GPL(xdr_read_pages); +uint64_t xdr_expand_hole(struct xdr_stream *xdr, uint64_t offset, uint64_t length) +{ + struct xdr_buf *buf = xdr->buf; + unsigned int bytes; + unsigned int from; + unsigned int truncated = 0; + + if ((offset + length) < offset || + (offset + length) > buf->page_len) + length = buf->page_len - offset; + + xdr_realign_pages(xdr); + from = xdr_page_pos(xdr); + bytes = xdr->nwords << 2; + + if (offset + length + bytes > buf->page_len) { + unsigned int shift = (offset + length + bytes) - buf->page_len; + unsigned int res = _shift_data_right_tail(buf, from + bytes - shift, shift); + truncated = shift - res; + xdr->nwords -= XDR_QUADLEN(truncated); + bytes -= shift; + } + + /* Now move the page data over and zero pages */ + if (bytes > 0) + _shift_data_right_pages(buf->pages, + buf->page_base + offset + length, + buf->page_base + from, + bytes); + _zero_pages(buf->pages, buf->page_base + offset, length); + + buf->len += length - (from - offset) - truncated; + xdr_set_page(xdr, offset + length, PAGE_SIZE); + return length; +} +EXPORT_SYMBOL_GPL(xdr_expand_hole); + /** * xdr_enter_page - decode data from the XDR page * @xdr: pointer to xdr_stream struct From c05eafad6b034772921e56de5c01df2326d9e3b3 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 28 Mar 2019 16:43:44 -0400 Subject: [PATCH 167/243] NFS: Add READ_PLUS hole segment decoding We keep things simple for now by only decoding a single hole or data segment returned by the server, even if they returned more to us. Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xdr.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 930b4ca212c1..9720fedd2e57 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -53,7 +53,7 @@ #define decode_read_plus_maxsz (op_decode_hdr_maxsz + \ 1 /* rpr_eof */ + \ 1 /* rpr_contents count */ + \ - NFS42_READ_PLUS_SEGMENT_SIZE) + 2 * NFS42_READ_PLUS_SEGMENT_SIZE) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ @@ -1045,6 +1045,28 @@ static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *re return 0; } +static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res, + uint32_t *eof) +{ + uint64_t offset, length, recvd; + __be32 *p; + + p = xdr_inline_decode(xdr, 8 + 8); + if (unlikely(!p)) + return -EIO; + + p = xdr_decode_hyper(p, &offset); + p = xdr_decode_hyper(p, &length); + recvd = xdr_expand_hole(xdr, 0, length); + res->count += recvd; + + if (recvd < length) { + *eof = 0; + return 1; + } + return 0; +} + static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) { uint32_t eof, segments, type; @@ -1071,6 +1093,8 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) type = be32_to_cpup(p++); if (type == NFS4_CONTENT_DATA) status = decode_read_plus_data(xdr, res, &eof); + else if (type == NFS4_CONTENT_HOLE) + status = decode_read_plus_hole(xdr, res, &eof); else return -EINVAL; From e6ac0accb27c6892b7ebc7799e7ce56b3390a678 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 21 Apr 2020 11:27:00 -0400 Subject: [PATCH 168/243] SUNRPC: Add an xdr_align_data() function For now, this function simply aligns the data at the beginning of the pages. This can eventually be expanded to shift data to the correct offsets when we're ready. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 121 +++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f9636d2a6d54..fe7ff7f5b584 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -250,6 +250,7 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); /** diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 1052ccdb4e99..3feff529a764 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -19,6 +19,9 @@ #include #include +static void _copy_to_pages(struct page **, size_t, const char *, size_t); + + /* * XDR functions for basic NFS types */ @@ -201,6 +204,88 @@ EXPORT_SYMBOL_GPL(xdr_inline_pages); * Helper routines for doing 'memmove' like operations on a struct xdr_buf */ +/** + * _shift_data_left_pages + * @pages: vector of pages containing both the source and dest memory area. + * @pgto_base: page vector address of destination + * @pgfrom_base: page vector address of source + * @len: number of bytes to copy + * + * Note: the addresses pgto_base and pgfrom_base are both calculated in + * the same way: + * if a memory area starts at byte 'base' in page 'pages[i]', + * then its address is given as (i << PAGE_CACHE_SHIFT) + base + * Alse note: pgto_base must be < pgfrom_base, but the memory areas + * they point to may overlap. + */ +static void +_shift_data_left_pages(struct page **pages, size_t pgto_base, + size_t pgfrom_base, size_t len) +{ + struct page **pgfrom, **pgto; + char *vfrom, *vto; + size_t copy; + + BUG_ON(pgfrom_base <= pgto_base); + + pgto = pages + (pgto_base >> PAGE_SHIFT); + pgfrom = pages + (pgfrom_base >> PAGE_SHIFT); + + pgto_base &= ~PAGE_MASK; + pgfrom_base &= ~PAGE_MASK; + + do { + if (pgto_base >= PAGE_SIZE) { + pgto_base = 0; + pgto++; + } + if (pgfrom_base >= PAGE_SIZE){ + pgfrom_base = 0; + pgfrom++; + } + + copy = len; + if (copy > (PAGE_SIZE - pgto_base)) + copy = PAGE_SIZE - pgto_base; + if (copy > (PAGE_SIZE - pgfrom_base)) + copy = PAGE_SIZE - pgfrom_base; + + vto = kmap_atomic(*pgto); + if (*pgto != *pgfrom) { + vfrom = kmap_atomic(*pgfrom); + memcpy(vto + pgto_base, vfrom + pgfrom_base, copy); + kunmap_atomic(vfrom); + } else + memmove(vto + pgto_base, vto + pgfrom_base, copy); + flush_dcache_page(*pgto); + kunmap_atomic(vto); + + pgto_base += copy; + pgfrom_base += copy; + + } while ((len -= copy) != 0); +} + +static void +_shift_data_left_tail(struct xdr_buf *buf, unsigned int pgto, size_t len) +{ + struct kvec *tail = buf->tail; + + if (len > tail->iov_len) + len = tail->iov_len; + + _copy_to_pages(buf->pages, + buf->page_base + pgto, + (char *)tail->iov_base, + len); + tail->iov_len -= len; + + if (tail->iov_len > 0) + memmove((char *)tail->iov_base, + tail->iov_base + len, + tail->iov_len); +} + /** * _shift_data_right_pages * @pages: vector of pages containing both the source and dest memory area. @@ -1128,6 +1213,42 @@ unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len) } EXPORT_SYMBOL_GPL(xdr_read_pages); +uint64_t xdr_align_data(struct xdr_stream *xdr, uint64_t offset, uint32_t length) +{ + struct xdr_buf *buf = xdr->buf; + unsigned int from, bytes; + unsigned int shift = 0; + + if ((offset + length) < offset || + (offset + length) > buf->page_len) + length = buf->page_len - offset; + + xdr_realign_pages(xdr); + from = xdr_page_pos(xdr); + bytes = xdr->nwords << 2; + if (length < bytes) + bytes = length; + + /* Move page data to the left */ + if (from > offset) { + shift = min_t(unsigned int, bytes, buf->page_len - from); + _shift_data_left_pages(buf->pages, + buf->page_base + offset, + buf->page_base + from, + shift); + bytes -= shift; + + /* Move tail data into the pages, if necessary */ + if (bytes > 0) + _shift_data_left_tail(buf, offset + shift, bytes); + } + + xdr->nwords -= XDR_QUADLEN(length); + xdr_set_page(xdr, from + length, PAGE_SIZE); + return length; +} +EXPORT_SYMBOL_GPL(xdr_align_data); + uint64_t xdr_expand_hole(struct xdr_stream *xdr, uint64_t offset, uint64_t length) { struct xdr_buf *buf = xdr->buf; From bff049a3b5001eb462f27eda98f32f3ff10f4ec2 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 1 Apr 2020 16:28:51 -0400 Subject: [PATCH 169/243] NFS: Decode a full READ_PLUS reply Decode multiple hole and data segments sent by the server, placing everything directly where they need to go in the xdr pages. Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xdr.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 9720fedd2e57..0dc31ad2362e 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1032,7 +1032,7 @@ static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *re p = xdr_decode_hyper(p, &offset); count = be32_to_cpup(p); - recvd = xdr_read_pages(xdr, count); + recvd = xdr_align_data(xdr, res->count, count); res->count += recvd; if (count > recvd) { @@ -1057,7 +1057,7 @@ static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *re p = xdr_decode_hyper(p, &offset); p = xdr_decode_hyper(p, &length); - recvd = xdr_expand_hole(xdr, 0, length); + recvd = xdr_expand_hole(xdr, res->count, length); res->count += recvd; if (recvd < length) { @@ -1070,7 +1070,7 @@ static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *re static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) { uint32_t eof, segments, type; - int status; + int status, i; __be32 *p; status = decode_op_hdr(xdr, OP_READ_PLUS); @@ -1086,22 +1086,24 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) if (segments == 0) goto out; - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; + for (i = 0; i < segments; i++) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; - type = be32_to_cpup(p++); - if (type == NFS4_CONTENT_DATA) - status = decode_read_plus_data(xdr, res, &eof); - else if (type == NFS4_CONTENT_HOLE) - status = decode_read_plus_hole(xdr, res, &eof); - else - return -EINVAL; + type = be32_to_cpup(p++); + if (type == NFS4_CONTENT_DATA) + status = decode_read_plus_data(xdr, res, &eof); + else if (type == NFS4_CONTENT_HOLE) + status = decode_read_plus_hole(xdr, res, &eof); + else + return -EINVAL; - if (status) - return status; - if (segments > 1) - eof = 0; + if (status < 0) + return status; + if (status > 0) + break; + } out: res->eof = eof; From 04e9e9bb8470bea74eafad1cafd552f3f06c32d9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 14 Jun 2020 21:52:04 -0400 Subject: [PATCH 170/243] XArray: Test marked multiorder iterations Demonstrate that starting a marked iteration partway through a marked multi-order entry works. Signed-off-by: Matthew Wilcox (Oracle) --- lib/test_xarray.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 9fc3da430aba..1122c4453c87 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -289,6 +289,27 @@ static noinline void check_xa_mark_2(struct xarray *xa) xa_destroy(xa); } +static noinline void check_xa_mark_3(struct xarray *xa) +{ +#ifdef CONFIG_XARRAY_MULTI + XA_STATE(xas, xa, 0x41); + void *entry; + int count = 0; + + xa_store_order(xa, 0x40, 2, xa_mk_index(0x40), GFP_KERNEL); + xa_set_mark(xa, 0x41, XA_MARK_0); + + rcu_read_lock(); + xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0) { + count++; + XA_BUG_ON(xa, entry != xa_mk_index(0x40)); + } + XA_BUG_ON(xa, count != 1); + rcu_read_unlock(); + xa_destroy(xa); +#endif +} + static noinline void check_xa_mark(struct xarray *xa) { unsigned long index; @@ -297,6 +318,7 @@ static noinline void check_xa_mark(struct xarray *xa) check_xa_mark_1(xa, index); check_xa_mark_2(xa); + check_xa_mark_3(xa); } static noinline void check_xa_shrink(struct xarray *xa) From 8446466c9dd645da4c1848f35ffd0fc1df3524ee Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 6 Aug 2020 10:07:24 -0400 Subject: [PATCH 171/243] XArray: Fix xas_for_each_conflict documentation At one point, xas_for_each_conflict() was going to work this way, and I forgot to update the documentation when I changed my mind. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/include/linux/xarray.h b/include/linux/xarray.h index b4d70e7568b2..6b336098fca7 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1714,13 +1714,12 @@ enum { * @xas: XArray operation state. * @entry: Entry retrieved from the array. * - * The loop body will be executed for each entry in the XArray that lies - * within the range specified by @xas. If the loop completes successfully, - * any entries that lie in this range will be replaced by @entry. The caller - * may break out of the loop; if they do so, the contents of the XArray will - * be unchanged. The operation may fail due to an out of memory condition. - * The caller may also call xa_set_err() to exit the loop while setting an - * error to record the reason. + * The loop body will be executed for each entry in the XArray that + * lies within the range specified by @xas. If the loop terminates + * normally, @entry will be %NULL. The user may break out of the loop, + * which will leave @entry set to the conflicting entry. The caller + * may also call xa_set_err() to exit the loop while setting an error + * to record the reason. */ #define xas_for_each_conflict(xas, entry) \ while ((entry = xas_find_conflict(xas))) From 1aee551334cda1fed8b8112dbe38257397a55c78 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Oct 2020 18:24:17 -0400 Subject: [PATCH 172/243] NFSv4: Clean up initialisation of uniquified client id strings When the user sets a uniquifier, then ensure we copy the string so that calls to strlen() etc are atomic with calls to snprintf(). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 75 +++++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 41 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d09fd3236820..50a96ca2c385 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6093,9 +6093,22 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, memcpy(bootverf->data, verf, sizeof(bootverf->data)); } +static size_t +nfs4_get_uniquifier(char *buf, size_t buflen) +{ + buf[0] = '\0'; + + if (nfs4_client_id_uniquifier[0] != '\0') + strscpy(buf, nfs4_client_id_uniquifier, buflen); + + return strlen(buf); +} + static int nfs4_init_nonuniform_client_string(struct nfs_client *clp) { + char buf[NFS4_CLIENT_ID_UNIQ_LEN]; + size_t buflen; size_t len; char *str; @@ -6109,8 +6122,11 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + 1; rcu_read_unlock(); - if (nfs4_client_id_uniquifier[0] != '\0') - len += strlen(nfs4_client_id_uniquifier) + 1; + + buflen = nfs4_get_uniquifier(buf, sizeof(buf)); + if (buflen) + len += buflen + 1; + if (len > NFS4_OPAQUE_LIMIT + 1) return -EINVAL; @@ -6124,10 +6140,9 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) return -ENOMEM; rcu_read_lock(); - if (nfs4_client_id_uniquifier[0] != '\0') + if (buflen) scnprintf(str, len, "Linux NFSv4.0 %s/%s/%s", - clp->cl_rpcclient->cl_nodename, - nfs4_client_id_uniquifier, + clp->cl_rpcclient->cl_nodename, buf, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); else @@ -6141,51 +6156,24 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) return 0; } -static int -nfs4_init_uniquifier_client_string(struct nfs_client *clp) -{ - size_t len; - char *str; - - len = 10 + 10 + 1 + 10 + 1 + - strlen(nfs4_client_id_uniquifier) + 1 + - strlen(clp->cl_rpcclient->cl_nodename) + 1; - - if (len > NFS4_OPAQUE_LIMIT + 1) - return -EINVAL; - - /* - * Since this string is allocated at mount time, and held until the - * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying - * about a memory-reclaim deadlock. - */ - str = kmalloc(len, GFP_KERNEL); - if (!str) - return -ENOMEM; - - scnprintf(str, len, "Linux NFSv%u.%u %s/%s", - clp->rpc_ops->version, clp->cl_minorversion, - nfs4_client_id_uniquifier, - clp->cl_rpcclient->cl_nodename); - clp->cl_owner_id = str; - return 0; -} - static int nfs4_init_uniform_client_string(struct nfs_client *clp) { + char buf[NFS4_CLIENT_ID_UNIQ_LEN]; + size_t buflen; size_t len; char *str; if (clp->cl_owner_id != NULL) return 0; - if (nfs4_client_id_uniquifier[0] != '\0') - return nfs4_init_uniquifier_client_string(clp); - len = 10 + 10 + 1 + 10 + 1 + strlen(clp->cl_rpcclient->cl_nodename) + 1; + buflen = nfs4_get_uniquifier(buf, sizeof(buf)); + if (buflen) + len += buflen + 1; + if (len > NFS4_OPAQUE_LIMIT + 1) return -EINVAL; @@ -6198,9 +6186,14 @@ nfs4_init_uniform_client_string(struct nfs_client *clp) if (!str) return -ENOMEM; - scnprintf(str, len, "Linux NFSv%u.%u %s", - clp->rpc_ops->version, clp->cl_minorversion, - clp->cl_rpcclient->cl_nodename); + if (buflen) + scnprintf(str, len, "Linux NFSv%u.%u %s/%s", + clp->rpc_ops->version, clp->cl_minorversion, + buf, clp->cl_rpcclient->cl_nodename); + else + scnprintf(str, len, "Linux NFSv%u.%u %s", + clp->rpc_ops->version, clp->cl_minorversion, + clp->cl_rpcclient->cl_nodename); clp->cl_owner_id = str; return 0; } From 39d43d164127da7fbc62d0ef73146e04e31a828d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Oct 2020 18:24:18 -0400 Subject: [PATCH 173/243] NFSv4: Use the net namespace uniquifier if it is set If a container sets a net namespace specific uniquifier, then use that in the setclientid/exchangeid process. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 50a96ca2c385..2e33995691f5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -63,6 +63,7 @@ #include "callback.h" #include "pnfs.h" #include "netns.h" +#include "sysfs.h" #include "nfs4idmap.h" #include "nfs4session.h" #include "fscache.h" @@ -6094,11 +6095,23 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, } static size_t -nfs4_get_uniquifier(char *buf, size_t buflen) +nfs4_get_uniquifier(struct nfs_client *clp, char *buf, size_t buflen) { + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); + struct nfs_netns_client *nn_clp = nn->nfs_client; + const char *id; + buf[0] = '\0'; - if (nfs4_client_id_uniquifier[0] != '\0') + if (nn_clp) { + rcu_read_lock(); + id = rcu_dereference(nn_clp->identifier); + if (id) + strscpy(buf, id, buflen); + rcu_read_unlock(); + } + + if (nfs4_client_id_uniquifier[0] != '\0' && buf[0] == '\0') strscpy(buf, nfs4_client_id_uniquifier, buflen); return strlen(buf); @@ -6123,7 +6136,7 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) 1; rcu_read_unlock(); - buflen = nfs4_get_uniquifier(buf, sizeof(buf)); + buflen = nfs4_get_uniquifier(clp, buf, sizeof(buf)); if (buflen) len += buflen + 1; @@ -6170,7 +6183,7 @@ nfs4_init_uniform_client_string(struct nfs_client *clp) len = 10 + 10 + 1 + 10 + 1 + strlen(clp->cl_rpcclient->cl_nodename) + 1; - buflen = nfs4_get_uniquifier(buf, sizeof(buf)); + buflen = nfs4_get_uniquifier(clp, buf, sizeof(buf)); if (buflen) len += buflen + 1; From a33f6432b3a63a4909dbbb0967f7c9df8ff2de91 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 11 Aug 2020 15:23:03 +0800 Subject: [PATCH 174/243] ceph: encode inodes' parent/d_name in cap reconnect message Since nautilus, MDS tracks dirfrags whose child inodes have caps in open file table. When MDS recovers, it prefetches all of these dirfrags. This avoids using backtrace to load inodes. But dirfrags prefetch may load lots of useless inodes into cache, and make MDS run out of memory. Recent MDS adds an option that disables dirfrags prefetch. When dirfrags prefetch is disabled. Recovering MDS only prefetches corresponding dir inodes. Including inodes' parent/d_name in cap reconnect message can help MDS to load inodes into its cache. Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 89 ++++++++++++++++++++++++++++++-------------- 1 file changed, 61 insertions(+), 28 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4a26862d7667..76d8d9495d1d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3612,6 +3612,39 @@ static int send_reconnect_partial(struct ceph_reconnect_state *recon_state) return err; } +static struct dentry* d_find_primary(struct inode *inode) +{ + struct dentry *alias, *dn = NULL; + + if (hlist_empty(&inode->i_dentry)) + return NULL; + + spin_lock(&inode->i_lock); + if (hlist_empty(&inode->i_dentry)) + goto out_unlock; + + if (S_ISDIR(inode->i_mode)) { + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); + if (!IS_ROOT(alias)) + dn = dget(alias); + goto out_unlock; + } + + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { + spin_lock(&alias->d_lock); + if (!d_unhashed(alias) && + (ceph_dentry(alias)->flags & CEPH_DENTRY_PRIMARY_LINK)) { + dn = dget_dlock(alias); + } + spin_unlock(&alias->d_lock); + if (dn) + break; + } +out_unlock: + spin_unlock(&inode->i_lock); + return dn; +} + /* * Encode information about a cap for a reconnect with the MDS. */ @@ -3625,13 +3658,32 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_inode_info *ci = cap->ci; struct ceph_reconnect_state *recon_state = arg; struct ceph_pagelist *pagelist = recon_state->pagelist; - int err; + struct dentry *dentry; + char *path; + int pathlen, err; + u64 pathbase; u64 snap_follows; dout(" adding %p ino %llx.%llx cap %p %lld %s\n", inode, ceph_vinop(inode), cap, cap->cap_id, ceph_cap_string(cap->issued)); + dentry = d_find_primary(inode); + if (dentry) { + /* set pathbase to parent dir when msg_version >= 2 */ + path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, + recon_state->msg_version >= 2); + dput(dentry); + if (IS_ERR(path)) { + err = PTR_ERR(path); + goto out_err; + } + } else { + path = NULL; + pathlen = 0; + pathbase = 0; + } + spin_lock(&ci->i_ceph_lock); cap->seq = 0; /* reset cap seq */ cap->issue_seq = 0; /* and issue_seq */ @@ -3652,7 +3704,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); rec.v2.issued = cpu_to_le32(cap->issued); rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); - rec.v2.pathbase = 0; + rec.v2.pathbase = cpu_to_le64(pathbase); rec.v2.flock_len = (__force __le32) ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); } else { @@ -3663,7 +3715,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime); ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime); rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); - rec.v1.pathbase = 0; + rec.v1.pathbase = cpu_to_le64(pathbase); } if (list_empty(&ci->i_cap_snaps)) { @@ -3725,7 +3777,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, sizeof(struct ceph_filelock); rec.v2.flock_len = cpu_to_le32(struct_len); - struct_len += sizeof(u32) + sizeof(rec.v2); + struct_len += sizeof(u32) + pathlen + sizeof(rec.v2); if (struct_v >= 2) struct_len += sizeof(u64); /* snap_follows */ @@ -3749,7 +3801,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, ceph_pagelist_encode_8(pagelist, 1); ceph_pagelist_encode_32(pagelist, struct_len); } - ceph_pagelist_encode_string(pagelist, NULL, 0); + ceph_pagelist_encode_string(pagelist, path, pathlen); ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2)); ceph_locks_to_pagelist(flocks, pagelist, num_fcntl_locks, num_flock_locks); @@ -3758,39 +3810,20 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, out_freeflocks: kfree(flocks); } else { - u64 pathbase = 0; - int pathlen = 0; - char *path = NULL; - struct dentry *dentry; - - dentry = d_find_alias(inode); - if (dentry) { - path = ceph_mdsc_build_path(dentry, - &pathlen, &pathbase, 0); - dput(dentry); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out_err; - } - rec.v1.pathbase = cpu_to_le64(pathbase); - } - err = ceph_pagelist_reserve(pagelist, sizeof(u64) + sizeof(u32) + pathlen + sizeof(rec.v1)); - if (err) { - goto out_freepath; - } + if (err) + goto out_err; ceph_pagelist_encode_64(pagelist, ceph_ino(inode)); ceph_pagelist_encode_string(pagelist, path, pathlen); ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); -out_freepath: - ceph_mdsc_free_path(path, pathlen); } out_err: - if (err >= 0) + ceph_mdsc_free_path(path, pathlen); + if (!err) recon_state->nr_caps++; return err; } From 1c30c90733879ea197dd29af54450a0f6cdcacb1 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 14 Aug 2020 10:38:22 +0100 Subject: [PATCH 175/243] ceph: remove unnecessary return in switch statement Since there's a return immediately after the 'break', there's no need for this extra 'return' in the S_IFDIR case. Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3f4c993dfc6f..fb3ea715a19d 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -256,8 +256,6 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) case S_IFDIR: ret = ceph_init_file_info(inode, file, fmode, S_ISDIR(inode->i_mode)); - if (ret) - return ret; break; case S_IFLNK: From 3986f9a42e993075af01c17dc8968cfb96a4fe53 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 17 Aug 2020 13:45:04 +0200 Subject: [PATCH 176/243] libceph: multiple workspaces for CRUSH computations Replace a global map->crush_workspace (protected by a global mutex) with a list of workspaces, up to the number of CPUs + 1. This is based on a patch from Robin Geuze . Robin and his team have observed a 10-20% increase in IOPS on all queue depths and lower CPU usage as well on a high-end all-NVMe 100GbE cluster. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 14 ++- include/linux/crush/crush.h | 3 + net/ceph/osdmap.c | 166 ++++++++++++++++++++++++++++++++---- 3 files changed, 166 insertions(+), 17 deletions(-) diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 3f4498fef6ad..cad9acfbc320 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -137,6 +137,17 @@ int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp, const char *fmt, ...); void ceph_oid_destroy(struct ceph_object_id *oid); +struct workspace_manager { + struct list_head idle_ws; + spinlock_t ws_lock; + /* Number of free workspaces */ + int free_ws; + /* Total number of allocated workspaces */ + atomic_t total_ws; + /* Waiters for a free workspace */ + wait_queue_head_t ws_wait; +}; + struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; @@ -184,8 +195,7 @@ struct ceph_osdmap { * the list of osds that store+replicate them. */ struct crush_map *crush; - struct mutex crush_workspace_mutex; - void *crush_workspace; + struct workspace_manager crush_wsm; }; static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd) diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 2f811baf78d2..30dba392b730 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -346,6 +346,9 @@ struct crush_work_bucket { struct crush_work { struct crush_work_bucket **work; /* Per-bucket working store */ +#ifdef __KERNEL__ + struct list_head item; +#endif }; #ifdef __KERNEL__ diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 96c25f5e064a..fa08c15be0c0 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -964,6 +964,143 @@ static int decode_pool_names(void **p, void *end, struct ceph_osdmap *map) return -EINVAL; } +/* + * CRUSH workspaces + * + * workspace_manager framework borrowed from fs/btrfs/compression.c. + * Two simplifications: there is only one type of workspace and there + * is always at least one workspace. + */ +static struct crush_work *alloc_workspace(const struct crush_map *c) +{ + struct crush_work *work; + size_t work_size; + + WARN_ON(!c->working_size); + work_size = crush_work_size(c, CEPH_PG_MAX_SIZE); + dout("%s work_size %zu bytes\n", __func__, work_size); + + work = ceph_kvmalloc(work_size, GFP_NOIO); + if (!work) + return NULL; + + INIT_LIST_HEAD(&work->item); + crush_init_workspace(c, work); + return work; +} + +static void free_workspace(struct crush_work *work) +{ + WARN_ON(!list_empty(&work->item)); + kvfree(work); +} + +static void init_workspace_manager(struct workspace_manager *wsm) +{ + INIT_LIST_HEAD(&wsm->idle_ws); + spin_lock_init(&wsm->ws_lock); + atomic_set(&wsm->total_ws, 0); + wsm->free_ws = 0; + init_waitqueue_head(&wsm->ws_wait); +} + +static void add_initial_workspace(struct workspace_manager *wsm, + struct crush_work *work) +{ + WARN_ON(!list_empty(&wsm->idle_ws)); + + list_add(&work->item, &wsm->idle_ws); + atomic_set(&wsm->total_ws, 1); + wsm->free_ws = 1; +} + +static void cleanup_workspace_manager(struct workspace_manager *wsm) +{ + struct crush_work *work; + + while (!list_empty(&wsm->idle_ws)) { + work = list_first_entry(&wsm->idle_ws, struct crush_work, + item); + list_del_init(&work->item); + free_workspace(work); + } + atomic_set(&wsm->total_ws, 0); + wsm->free_ws = 0; +} + +/* + * Finds an available workspace or allocates a new one. If it's not + * possible to allocate a new one, waits until there is one. + */ +static struct crush_work *get_workspace(struct workspace_manager *wsm, + const struct crush_map *c) +{ + struct crush_work *work; + int cpus = num_online_cpus(); + +again: + spin_lock(&wsm->ws_lock); + if (!list_empty(&wsm->idle_ws)) { + work = list_first_entry(&wsm->idle_ws, struct crush_work, + item); + list_del_init(&work->item); + wsm->free_ws--; + spin_unlock(&wsm->ws_lock); + return work; + + } + if (atomic_read(&wsm->total_ws) > cpus) { + DEFINE_WAIT(wait); + + spin_unlock(&wsm->ws_lock); + prepare_to_wait(&wsm->ws_wait, &wait, TASK_UNINTERRUPTIBLE); + if (atomic_read(&wsm->total_ws) > cpus && !wsm->free_ws) + schedule(); + finish_wait(&wsm->ws_wait, &wait); + goto again; + } + atomic_inc(&wsm->total_ws); + spin_unlock(&wsm->ws_lock); + + work = alloc_workspace(c); + if (!work) { + atomic_dec(&wsm->total_ws); + wake_up(&wsm->ws_wait); + + /* + * Do not return the error but go back to waiting. We + * have the inital workspace and the CRUSH computation + * time is bounded so we will get it eventually. + */ + WARN_ON(atomic_read(&wsm->total_ws) < 1); + goto again; + } + return work; +} + +/* + * Puts a workspace back on the list or frees it if we have enough + * idle ones sitting around. + */ +static void put_workspace(struct workspace_manager *wsm, + struct crush_work *work) +{ + spin_lock(&wsm->ws_lock); + if (wsm->free_ws <= num_online_cpus()) { + list_add(&work->item, &wsm->idle_ws); + wsm->free_ws++; + spin_unlock(&wsm->ws_lock); + goto wake; + } + spin_unlock(&wsm->ws_lock); + + free_workspace(work); + atomic_dec(&wsm->total_ws); +wake: + if (wq_has_sleeper(&wsm->ws_wait)) + wake_up(&wsm->ws_wait); +} + /* * osd map */ @@ -981,7 +1118,8 @@ struct ceph_osdmap *ceph_osdmap_alloc(void) map->primary_temp = RB_ROOT; map->pg_upmap = RB_ROOT; map->pg_upmap_items = RB_ROOT; - mutex_init(&map->crush_workspace_mutex); + + init_workspace_manager(&map->crush_wsm); return map; } @@ -989,8 +1127,11 @@ struct ceph_osdmap *ceph_osdmap_alloc(void) void ceph_osdmap_destroy(struct ceph_osdmap *map) { dout("osdmap_destroy %p\n", map); + if (map->crush) crush_destroy(map->crush); + cleanup_workspace_manager(&map->crush_wsm); + while (!RB_EMPTY_ROOT(&map->pg_temp)) { struct ceph_pg_mapping *pg = rb_entry(rb_first(&map->pg_temp), @@ -1029,7 +1170,6 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) kvfree(map->osd_weight); kvfree(map->osd_addr); kvfree(map->osd_primary_affinity); - kvfree(map->crush_workspace); kfree(map); } @@ -1104,26 +1244,22 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max) static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush) { - void *workspace; - size_t work_size; + struct crush_work *work; if (IS_ERR(crush)) return PTR_ERR(crush); - work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE); - dout("%s work_size %zu bytes\n", __func__, work_size); - workspace = ceph_kvmalloc(work_size, GFP_NOIO); - if (!workspace) { + work = alloc_workspace(crush); + if (!work) { crush_destroy(crush); return -ENOMEM; } - crush_init_workspace(crush, workspace); if (map->crush) crush_destroy(map->crush); - kvfree(map->crush_workspace); + cleanup_workspace_manager(&map->crush_wsm); map->crush = crush; - map->crush_workspace = workspace; + add_initial_workspace(&map->crush_wsm, work); return 0; } @@ -2322,6 +2458,7 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x, s64 choose_args_index) { struct crush_choose_arg_map *arg_map; + struct crush_work *work; int r; BUG_ON(result_max > CEPH_PG_MAX_SIZE); @@ -2332,12 +2469,11 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x, arg_map = lookup_choose_arg_map(&map->crush->choose_args, CEPH_DEFAULT_CHOOSE_ARGS); - mutex_lock(&map->crush_workspace_mutex); + work = get_workspace(&map->crush_wsm, map->crush); r = crush_do_rule(map->crush, ruleno, x, result, result_max, - weight, weight_max, map->crush_workspace, + weight, weight_max, work, arg_map ? arg_map->args : NULL); - mutex_unlock(&map->crush_workspace_mutex); - + put_workspace(&map->crush_wsm, work); return r; } From 3a8ebe0b8b616c5f6d72f9a95aa29ccd0b35408f Mon Sep 17 00:00:00 2001 From: Yanhu Cao Date: Mon, 24 Aug 2020 11:00:58 +0800 Subject: [PATCH 177/243] ceph: add column 'mds' to show caps in more user friendly In multi-mds, the 'caps' debugfs file will have duplicate ino, add the 'mds' column to indicate which mds session the cap belongs to. Signed-off-by: Yanhu Cao Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/debugfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 3e3fcda9b276..75b13175c530 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -202,7 +202,8 @@ static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p) { struct seq_file *s = p; - seq_printf(s, "0x%-17llx%-17s%-17s\n", ceph_ino(inode), + seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode), + cap->session->s_mds, ceph_cap_string(cap->issued), ceph_cap_string(cap->implemented)); return 0; @@ -222,8 +223,8 @@ static int caps_show(struct seq_file *s, void *p) "reserved\t%d\n" "min\t\t%d\n\n", total, avail, used, reserved, min); - seq_printf(s, "ino issued implemented\n"); - seq_printf(s, "-----------------------------------------------\n"); + seq_printf(s, "ino mds issued implemented\n"); + seq_printf(s, "--------------------------------------------------\n"); mutex_lock(&mdsc->mutex); for (i = 0; i < mdsc->max_sessions; i++) { From c5f575ed08c38d077a581a1ec0c48c23ee6b7c21 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Aug 2020 21:20:59 -0700 Subject: [PATCH 178/243] ceph: drop special-casing for ITER_PIPE in ceph_sync_read This special casing was added in 7ce469a53e71 (ceph: fix splice read for no Fc capability case). The confirm callback for ITER_PIPE expects that the page is Uptodate and returns an error otherwise. A simpler workaround is just to use the Uptodate bit, which has no meaning for anonymous pages. Rip out the special casing for ITER_PIPE and just SetPageUptodate before we copy to the iter. Cc: John Hubbard Suggested-by: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 71 +++++++++++++++++--------------------------------- 1 file changed, 24 insertions(+), 47 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index fb3ea715a19d..ed8fbfe3bddc 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -863,6 +863,8 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, size_t page_off; u64 i_size; bool more; + int idx; + size_t left; req = ceph_osdc_new_request(osdc, &ci->i_layout, ci->i_vino, off, &len, 0, 1, @@ -876,29 +878,13 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, more = len < iov_iter_count(to); - if (unlikely(iov_iter_is_pipe(to))) { - ret = iov_iter_get_pages_alloc(to, &pages, len, - &page_off); - if (ret <= 0) { - ceph_osdc_put_request(req); - ret = -ENOMEM; - break; - } - num_pages = DIV_ROUND_UP(ret + page_off, PAGE_SIZE); - if (ret < len) { - len = ret; - osd_req_op_extent_update(req, 0, len); - more = false; - } - } else { - num_pages = calc_pages_for(off, len); - page_off = off & ~PAGE_MASK; - pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); - if (IS_ERR(pages)) { - ceph_osdc_put_request(req); - ret = PTR_ERR(pages); - break; - } + num_pages = calc_pages_for(off, len); + page_off = off & ~PAGE_MASK; + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); + if (IS_ERR(pages)) { + ceph_osdc_put_request(req); + ret = PTR_ERR(pages); + break; } osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off, @@ -929,32 +915,23 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, ret += zlen; } - if (unlikely(iov_iter_is_pipe(to))) { - if (ret > 0) { - iov_iter_advance(to, ret); - off += ret; - } else { - iov_iter_advance(to, 0); + idx = 0; + left = ret > 0 ? ret : 0; + while (left > 0) { + size_t len, copied; + page_off = off & ~PAGE_MASK; + len = min_t(size_t, left, PAGE_SIZE - page_off); + SetPageUptodate(pages[idx]); + copied = copy_page_to_iter(pages[idx++], + page_off, len, to); + off += copied; + left -= copied; + if (copied < len) { + ret = -EFAULT; + break; } - ceph_put_page_vector(pages, num_pages, false); - } else { - int idx = 0; - size_t left = ret > 0 ? ret : 0; - while (left > 0) { - size_t len, copied; - page_off = off & ~PAGE_MASK; - len = min_t(size_t, left, PAGE_SIZE - page_off); - copied = copy_page_to_iter(pages[idx++], - page_off, len, to); - off += copied; - left -= copied; - if (copied < len) { - ret = -EFAULT; - break; - } - } - ceph_release_page_vector(pages, num_pages); } + ceph_release_page_vector(pages, num_pages); if (ret < 0) { if (ret == -EBLACKLISTED) From 2678da88f4b449300d56e0e7a9e77d1a79c83463 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 3 Sep 2020 09:01:39 -0400 Subject: [PATCH 179/243] ceph: add ceph_sb_to_mdsc helper support to parse the mdsc This will help simplify the code. [ jlayton: fix minor merge conflict in quota.c ] Signed-off-by: Xiubo Li Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 3 +-- fs/ceph/dir.c | 20 +++++++------------- fs/ceph/file.c | 8 +++----- fs/ceph/inode.c | 5 ++--- fs/ceph/locks.c | 2 +- fs/ceph/quota.c | 10 +++++----- fs/ceph/snap.c | 2 +- fs/ceph/super.h | 6 ++++++ 8 files changed, 26 insertions(+), 30 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 034b3f4fdd3a..39753e0e0e5a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1906,9 +1906,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session) { - struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); - struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = &ci->vfs_inode; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_cap *cap; u64 flush_tid, oldest_flush_tid; int file_wanted, used, cap_used; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d72e4a12bb69..a4d48370b2b3 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -38,8 +38,7 @@ static int __dir_lease_try_check(const struct dentry *dentry); static int ceph_d_init(struct dentry *dentry) { struct ceph_dentry_info *di; - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dentry->d_sb); di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL); if (!di) @@ -738,7 +737,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; int op; int mask; @@ -827,8 +826,7 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) static int ceph_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; struct ceph_acl_sec_ctx as_ctx = {}; int err; @@ -889,8 +887,7 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, static int ceph_symlink(struct inode *dir, struct dentry *dentry, const char *dest) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; struct ceph_acl_sec_ctx as_ctx = {}; int err; @@ -942,8 +939,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; struct ceph_acl_sec_ctx as_ctx = {}; int err = -EROFS; @@ -1010,8 +1006,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) static int ceph_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; int err; @@ -1192,8 +1187,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb); struct ceph_mds_request *req; int op = CEPH_MDS_OP_RENAME; int err; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ed8fbfe3bddc..762a280b7037 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -182,8 +182,7 @@ static void put_bvecs(struct bio_vec *bvecs, int num_bvecs, bool should_dirty) static struct ceph_mds_request * prepare_open_request(struct super_block *sb, int flags, int create_mode) { - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb); struct ceph_mds_request *req; int want_auth = USE_ANY_MDS; int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; @@ -283,7 +282,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) */ int ceph_renew_caps(struct inode *inode, int fmode) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req; int err, flags, wanted; @@ -1027,8 +1026,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) struct inode *inode = req->r_inode; struct ceph_aio_request *aio_req = req->r_priv; struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_client_metric *metric = &fsc->mdsc->metric; + struct ceph_client_metric *metric = &ceph_sb_to_mdsc(inode->i_sb)->metric; BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS); BUG_ON(!osd_data->num_bvecs); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index d163fa96cb40..1fed0e827eb7 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -558,8 +558,7 @@ void ceph_evict_inode(struct inode *inode) * caps in i_snap_caps. */ if (ci->i_snap_realm) { - struct ceph_mds_client *mdsc = - ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); if (ceph_snap(inode) == CEPH_NOSNAP) { struct ceph_snap_realm *realm = ci->i_snap_realm; dout(" dropping residual ref to snap realm %p\n", @@ -739,7 +738,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, struct ceph_mds_session *session, int cap_fmode, struct ceph_cap_reservation *caps_reservation) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); int issued, new_issued, info_caps; diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index d6b9166e71e4..048a435a29be 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -63,7 +63,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = { static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, int cmd, u8 wait, struct file_lock *fl) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_mds_request *req; int err; u64 length = 0; diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index cc2c4d40b022..83cb4f26b689 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -12,7 +12,7 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); if (inc) atomic64_inc(&mdsc->quotarealms_count); else @@ -21,8 +21,8 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) static inline bool ceph_has_realms_with_quotas(struct inode *inode) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; - struct super_block *sb = mdsc->fsc->sb; + struct super_block *sb = inode->i_sb; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb); struct inode *root = d_inode(sb->s_root); if (atomic64_read(&mdsc->quotarealms_count) > 0) @@ -266,7 +266,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb); struct ceph_snap_realm *old_realm, *new_realm; bool is_same; @@ -313,7 +313,7 @@ enum quota_check_op { static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, loff_t delta) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_inode_info *ci; struct ceph_snap_realm *realm, *next; struct inode *in; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 923be9399b21..0da39c16dab4 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -602,7 +602,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap) { struct inode *inode = &ci->vfs_inode; - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); BUG_ON(capsnap->writing); capsnap->size = inode->i_size; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a3995ebe0623..483a52d281cd 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -451,6 +451,12 @@ ceph_sb_to_client(const struct super_block *sb) return (struct ceph_fs_client *)sb->s_fs_info; } +static inline struct ceph_mds_client * +ceph_sb_to_mdsc(const struct super_block *sb) +{ + return (struct ceph_mds_client *)ceph_sb_to_client(sb)->mdsc; +} + static inline struct ceph_vino ceph_vino(const struct inode *inode) { From 1dd8d470813699baab9112e95fce00979b21c4f7 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 3 Sep 2020 09:01:40 -0400 Subject: [PATCH 180/243] ceph: metrics for opened files, pinned caps and opened inodes In client for each inode, it may have many opened files and may have been pinned in more than one MDS servers. And some inodes are idle, which have no any opened files. This patch will show these metrics in the debugfs, likes: item total ----------------------------------------- opened files / total inodes 14 / 5 pinned i_caps / total inodes 7 / 5 opened inodes / total inodes 3 / 5 Will send these metrics to ceph, which will be used by the `fs top`, later. [ jlayton: drop unrelated hunk, count hashed inodes instead of allocated ones ] URL: https://tracker.ceph.com/issues/47005 Signed-off-by: Xiubo Li Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 38 ++++++++++++++++++++++++++++++++++++-- fs/ceph/debugfs.c | 11 +++++++++++ fs/ceph/inode.c | 7 ++++++- fs/ceph/metric.c | 14 ++++++++++++++ fs/ceph/metric.h | 7 +++++++ 5 files changed, 74 insertions(+), 3 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 39753e0e0e5a..c7e69547628e 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4283,13 +4283,30 @@ void __ceph_touch_fmode(struct ceph_inode_info *ci, void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) { - int i; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb); int bits = (fmode << 1) | 1; + bool is_opened = false; + int i; + + if (count == 1) + atomic64_inc(&mdsc->metric.opened_files); + spin_lock(&ci->i_ceph_lock); for (i = 0; i < CEPH_FILE_MODE_BITS; i++) { if (bits & (1 << i)) ci->i_nr_by_mode[i] += count; + + /* + * If any of the mode ref is larger than 1, + * that means it has been already opened by + * others. Just skip checking the PIN ref. + */ + if (i && ci->i_nr_by_mode[i] > 1) + is_opened = true; } + + if (!is_opened) + percpu_counter_inc(&mdsc->metric.opened_inodes); spin_unlock(&ci->i_ceph_lock); } @@ -4300,15 +4317,32 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) */ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count) { - int i; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb); int bits = (fmode << 1) | 1; + bool is_closed = true; + int i; + + if (count == 1) + atomic64_dec(&mdsc->metric.opened_files); + spin_lock(&ci->i_ceph_lock); for (i = 0; i < CEPH_FILE_MODE_BITS; i++) { if (bits & (1 << i)) { BUG_ON(ci->i_nr_by_mode[i] < count); ci->i_nr_by_mode[i] -= count; } + + /* + * If any of the mode ref is not 0 after + * decreased, that means it is still opened + * by others. Just skip checking the PIN ref. + */ + if (i && ci->i_nr_by_mode[i]) + is_closed = false; } + + if (is_closed) + percpu_counter_dec(&mdsc->metric.opened_inodes); spin_unlock(&ci->i_ceph_lock); } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 75b13175c530..7a8fbe3e4751 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -148,6 +148,17 @@ static int metric_show(struct seq_file *s, void *p) int nr_caps = 0; s64 total, sum, avg, min, max, sq; + sum = percpu_counter_sum(&m->total_inodes); + seq_printf(s, "item total\n"); + seq_printf(s, "------------------------------------------\n"); + seq_printf(s, "%-35s%lld / %lld\n", "opened files / total inodes", + atomic64_read(&m->opened_files), sum); + seq_printf(s, "%-35s%lld / %lld\n", "pinned i_caps / total inodes", + atomic64_read(&m->total_caps), sum); + seq_printf(s, "%-35s%lld / %lld\n", "opened inodes / total inodes", + percpu_counter_sum(&m->opened_inodes), sum); + + seq_printf(s, "\n"); seq_printf(s, "item total avg_lat(us) min_lat(us) max_lat(us) stdev(us)\n"); seq_printf(s, "-----------------------------------------------------------------------------------\n"); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 1fed0e827eb7..526faf4778ce 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -42,10 +42,13 @@ static void ceph_inode_work(struct work_struct *work); static int ceph_set_ino_cb(struct inode *inode, void *data) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); ci->i_vino = *(struct ceph_vino *)data; inode->i_ino = ceph_vino_to_ino_t(ci->i_vino); inode_set_iversion_raw(inode, 0); + percpu_counter_inc(&mdsc->metric.total_inodes); + return 0; } @@ -538,11 +541,14 @@ void ceph_free_inode(struct inode *inode) void ceph_evict_inode(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_inode_frag *frag; struct rb_node *n; dout("evict_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode)); + percpu_counter_dec(&mdsc->metric.total_inodes); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); @@ -558,7 +564,6 @@ void ceph_evict_inode(struct inode *inode) * caps in i_snap_caps. */ if (ci->i_snap_realm) { - struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); if (ceph_snap(inode) == CEPH_NOSNAP) { struct ceph_snap_realm *realm = ci->i_snap_realm; dout(" dropping residual ref to snap realm %p\n", diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index 2466b261fba2..fee4c4778313 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -192,11 +192,23 @@ int ceph_metric_init(struct ceph_client_metric *m) m->total_metadatas = 0; m->metadata_latency_sum = 0; + atomic64_set(&m->opened_files, 0); + ret = percpu_counter_init(&m->opened_inodes, 0, GFP_KERNEL); + if (ret) + goto err_opened_inodes; + ret = percpu_counter_init(&m->total_inodes, 0, GFP_KERNEL); + if (ret) + goto err_total_inodes; + m->session = NULL; INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work); return 0; +err_total_inodes: + percpu_counter_destroy(&m->opened_inodes); +err_opened_inodes: + percpu_counter_destroy(&m->i_caps_mis); err_i_caps_mis: percpu_counter_destroy(&m->i_caps_hit); err_i_caps_hit: @@ -212,6 +224,8 @@ void ceph_metric_destroy(struct ceph_client_metric *m) if (!m) return; + percpu_counter_destroy(&m->total_inodes); + percpu_counter_destroy(&m->opened_inodes); percpu_counter_destroy(&m->i_caps_mis); percpu_counter_destroy(&m->i_caps_hit); percpu_counter_destroy(&m->d_lease_mis); diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h index 1d0959d669d7..710f3f1dceab 100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -115,6 +115,13 @@ struct ceph_client_metric { ktime_t metadata_latency_min; ktime_t metadata_latency_max; + /* The total number of directories and files that are opened */ + atomic64_t opened_files; + + /* The total number of inodes that have opened files or directories */ + struct percpu_counter opened_inodes; + struct percpu_counter total_inodes; + struct ceph_mds_session *session; struct delayed_work delayed_work; /* delayed work */ }; From 470a5c77eac0e07bfe60413fb3d314b734392bc3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 11 Sep 2020 15:19:00 -0400 Subject: [PATCH 181/243] ceph: use kill_anon_super helper ceph open-codes this around some other activity and the rationale for it isn't clear. There is no need to delay free_anon_bdev until the end of kill_sb. Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 7ec0e6d03d10..b3fc9bb61afc 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1205,14 +1205,13 @@ static int ceph_init_fs_context(struct fs_context *fc) static void ceph_kill_sb(struct super_block *s) { struct ceph_fs_client *fsc = ceph_sb_to_client(s); - dev_t dev = s->s_dev; dout("kill_sb %p\n", s); ceph_mdsc_pre_umount(fsc->mdsc); flush_fs_workqueues(fsc); - generic_shutdown_super(s); + kill_anon_super(s); fsc->client->extra_mon_dispatch = NULL; ceph_fs_debugfs_cleanup(fsc); @@ -1220,7 +1219,6 @@ static void ceph_kill_sb(struct super_block *s) ceph_fscache_unregister_fs(fsc); destroy_fs_client(fsc); - free_anon_bdev(dev); } static struct file_system_type ceph_fs_type = { From 2e169296603470d209d9feecbfb67b9c4cb5ca0f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 14 Sep 2020 13:30:36 -0400 Subject: [PATCH 182/243] ceph: have ceph_writepages_start call pagevec_lookup_range_tag Currently it calls pagevec_lookup_range_nr_tag(), but that may be inefficient, as we might end up having to search several times as we get down to looking for fewer pages to fill the array. Thus spake Willy: "I think ceph is misusing pagevec_lookup_range_nr_tag(). Let's suppose you get a range which is AAAAbbbbAAAAbbbbAAAAbbbbbbbb(...)bbbbAAAA and you try to fetch max_pages=13. First loop will get AAAAbbbbAAAAb and have 8 locked_pages. The next call will get bbbAA and now locked_pages=10. Next call gets AAb ... and now you're iterating your way through all the 'b' one page at a time until you find that first A." 'A' here refers to pages that are eligible for writeback and 'b' represents ones that aren't (for whatever reason). Not capping the number of return pages may mean that we sometimes find more pages than are needed, but the extra references will just get put at the end. Ceph is also the only caller of pagevec_lookup_range_nr_tag(), so this change should allow us to eliminate that call as well. That will be done in a follow-on patch. Reported-by: Matthew Wilcox Signed-off-by: Jeff Layton Reviewed-by: Matthew Wilcox Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 6ea761c84494..b03dbaa9d345 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -962,9 +962,8 @@ static int ceph_writepages_start(struct address_space *mapping, max_pages = wsize >> PAGE_SHIFT; get_more_pages: - pvec_pages = pagevec_lookup_range_nr_tag(&pvec, mapping, &index, - end, PAGECACHE_TAG_DIRTY, - max_pages - locked_pages); + pvec_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, + end, PAGECACHE_TAG_DIRTY); dout("pagevec_lookup_range_tag got %d\n", pvec_pages); if (!pvec_pages && !locked_pages) break; From 0b98acd6188309333c3a8a6e16feadadd31e4523 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 14 Sep 2020 13:39:19 +0200 Subject: [PATCH 183/243] libceph, rbd, ceph: "blacklist" -> "blocklist" Signed-off-by: Ilya Dryomov --- Documentation/filesystems/ceph.rst | 6 +++--- drivers/block/rbd.c | 8 ++++---- fs/ceph/addr.c | 24 ++++++++++++------------ fs/ceph/file.c | 4 ++-- fs/ceph/mds_client.c | 16 ++++++++-------- fs/ceph/super.c | 4 ++-- fs/ceph/super.h | 4 ++-- include/linux/ceph/mon_client.h | 2 +- include/linux/ceph/rados.h | 2 +- net/ceph/mon_client.c | 8 ++++---- 10 files changed, 39 insertions(+), 39 deletions(-) diff --git a/Documentation/filesystems/ceph.rst b/Documentation/filesystems/ceph.rst index 0aa70750df0f..7d2ef4e27273 100644 --- a/Documentation/filesystems/ceph.rst +++ b/Documentation/filesystems/ceph.rst @@ -163,14 +163,14 @@ Mount Options to the default VFS implementation if this option is used. recover_session= - Set auto reconnect mode in the case where the client is blacklisted. The + Set auto reconnect mode in the case where the client is blocklisted. The available modes are "no" and "clean". The default is "no". * no: never attempt to reconnect when client detects that it has been - blacklisted. Operations will generally fail after being blacklisted. + blocklisted. Operations will generally fail after being blocklisted. * clean: client reconnects to the ceph cluster automatically when it - detects that it has been blacklisted. During reconnect, client drops + detects that it has been blocklisted. During reconnect, client drops dirty data/metadata, invalidates page caches and writable file handles. After reconnect, file locks become stale because the MDS loses track of them. If an inode contains any stale file locks, read/write on the diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index e77eaab5cf23..bdd33bcf11b1 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4010,10 +4010,10 @@ static int rbd_try_lock(struct rbd_device *rbd_dev) rbd_warn(rbd_dev, "breaking header lock owned by %s%llu", ENTITY_NAME(lockers[0].id.name)); - ret = ceph_monc_blacklist_add(&client->monc, + ret = ceph_monc_blocklist_add(&client->monc, &lockers[0].info.addr); if (ret) { - rbd_warn(rbd_dev, "blacklist of %s%llu failed: %d", + rbd_warn(rbd_dev, "blocklist of %s%llu failed: %d", ENTITY_NAME(lockers[0].id.name), ret); goto out; } @@ -4077,7 +4077,7 @@ static int rbd_try_acquire_lock(struct rbd_device *rbd_dev) ret = rbd_try_lock(rbd_dev); if (ret < 0) { rbd_warn(rbd_dev, "failed to lock header: %d", ret); - if (ret == -EBLACKLISTED) + if (ret == -EBLOCKLISTED) goto out; ret = 1; /* request lock anyway */ @@ -4613,7 +4613,7 @@ static void rbd_reregister_watch(struct work_struct *work) ret = __rbd_register_watch(rbd_dev); if (ret) { rbd_warn(rbd_dev, "failed to reregister watch: %d", ret); - if (ret != -EBLACKLISTED && ret != -ENOENT) { + if (ret != -EBLOCKLISTED && ret != -ENOENT) { queue_delayed_work(rbd_dev->task_wq, &rbd_dev->watch_dwork, RBD_RETRY_DELAY); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index b03dbaa9d345..7b1f3dad576f 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -271,8 +271,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page) if (err < 0) { SetPageError(page); ceph_fscache_readpage_cancel(inode, page); - if (err == -EBLACKLISTED) - fsc->blacklisted = true; + if (err == -EBLOCKLISTED) + fsc->blocklisted = true; goto out; } if (err < PAGE_SIZE) @@ -312,8 +312,8 @@ static void finish_read(struct ceph_osd_request *req) int i; dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes); - if (rc == -EBLACKLISTED) - ceph_inode_to_client(inode)->blacklisted = true; + if (rc == -EBLOCKLISTED) + ceph_inode_to_client(inode)->blocklisted = true; /* unlock all pages, zeroing any data we didn't read */ osd_data = osd_req_op_extent_osd_data(req, 0); @@ -737,8 +737,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) end_page_writeback(page); return err; } - if (err == -EBLACKLISTED) - fsc->blacklisted = true; + if (err == -EBLOCKLISTED) + fsc->blocklisted = true; dout("writepage setting page/mapping error %d %p\n", err, page); mapping_set_error(&inode->i_data, err); @@ -801,8 +801,8 @@ static void writepages_finish(struct ceph_osd_request *req) if (rc < 0) { mapping_set_error(mapping, rc); ceph_set_error_write(ci); - if (rc == -EBLACKLISTED) - fsc->blacklisted = true; + if (rc == -EBLOCKLISTED) + fsc->blocklisted = true; } else { ceph_clear_error_write(ci); } @@ -2038,16 +2038,16 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, if (err >= 0 || err == -ENOENT) have |= POOL_READ; else if (err != -EPERM) { - if (err == -EBLACKLISTED) - fsc->blacklisted = true; + if (err == -EBLOCKLISTED) + fsc->blocklisted = true; goto out_unlock; } if (err2 == 0 || err2 == -EEXIST) have |= POOL_WRITE; else if (err2 != -EPERM) { - if (err2 == -EBLACKLISTED) - fsc->blacklisted = true; + if (err2 == -EBLOCKLISTED) + fsc->blocklisted = true; err = err2; goto out_unlock; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 762a280b7037..209535d5b8d3 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -933,8 +933,8 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, ceph_release_page_vector(pages, num_pages); if (ret < 0) { - if (ret == -EBLACKLISTED) - fsc->blacklisted = true; + if (ret == -EBLOCKLISTED) + fsc->blocklisted = true; break; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 76d8d9495d1d..bb2d938a17ac 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3303,7 +3303,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, } static int __decode_session_metadata(void **p, void *end, - bool *blacklisted) + bool *blocklisted) { /* map */ u32 n; @@ -3318,7 +3318,7 @@ static int __decode_session_metadata(void **p, void *end, ceph_decode_32_safe(p, end, len, bad); ceph_decode_need(p, end, len, bad); if (err_str && strnstr(*p, "blacklisted", len)) - *blacklisted = true; + *blocklisted = true; *p += len; } return 0; @@ -3341,7 +3341,7 @@ static void handle_session(struct ceph_mds_session *session, u32 op; u64 seq, features = 0; int wake = 0; - bool blacklisted = false; + bool blocklisted = false; /* decode */ ceph_decode_need(&p, end, sizeof(*h), bad); @@ -3354,7 +3354,7 @@ static void handle_session(struct ceph_mds_session *session, if (msg_version >= 3) { u32 len; /* version >= 2, metadata */ - if (__decode_session_metadata(&p, end, &blacklisted) < 0) + if (__decode_session_metadata(&p, end, &blocklisted) < 0) goto bad; /* version >= 3, feature bits */ ceph_decode_32_safe(&p, end, len, bad); @@ -3445,8 +3445,8 @@ static void handle_session(struct ceph_mds_session *session, session->s_state = CEPH_MDS_SESSION_REJECTED; cleanup_session_requests(mdsc, session); remove_session_caps(session); - if (blacklisted) - mdsc->fsc->blacklisted = true; + if (blocklisted) + mdsc->fsc->blocklisted = true; wake = 2; /* for good measure */ break; @@ -4367,14 +4367,14 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc) if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_MOUNTED) return; - if (!READ_ONCE(fsc->blacklisted)) + if (!READ_ONCE(fsc->blocklisted)) return; if (fsc->last_auto_reconnect && time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30)) return; - pr_info("auto reconnect after blacklisted\n"); + pr_info("auto reconnect after blocklisted\n"); fsc->last_auto_reconnect = jiffies; ceph_force_reconnect(fsc->sb); } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b3fc9bb61afc..2516304379d3 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1241,13 +1241,13 @@ int ceph_force_reconnect(struct super_block *sb) * see remove_session_caps_cb() */ flush_workqueue(fsc->inode_wq); - /* In case that we were blacklisted. This also reset + /* In case that we were blocklisted. This also reset * all mon/osd connections */ ceph_reset_client_addr(fsc->client); ceph_osdc_clear_abort_err(&fsc->client->osdc); - fsc->blacklisted = false; + fsc->blocklisted = false; fsc->mount_state = CEPH_MOUNT_MOUNTED; if (sb->s_root) { diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 483a52d281cd..582694899130 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -32,7 +32,7 @@ #define CEPH_BLOCK_SHIFT 22 /* 4 MB */ #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) -#define CEPH_MOUNT_OPT_CLEANRECOVER (1<<1) /* auto reonnect (clean mode) after blacklisted */ +#define CEPH_MOUNT_OPT_CLEANRECOVER (1<<1) /* auto reonnect (clean mode) after blocklisted */ #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ @@ -109,7 +109,7 @@ struct ceph_fs_client { unsigned long mount_state; unsigned long last_auto_reconnect; - bool blacklisted; + bool blocklisted; bool have_copy_from2; diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index ce4ffeb384d7..b658961156a0 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -142,7 +142,7 @@ int ceph_monc_get_version(struct ceph_mon_client *monc, const char *what, int ceph_monc_get_version_async(struct ceph_mon_client *monc, const char *what, ceph_monc_callback_t cb, u64 private_data); -int ceph_monc_blacklist_add(struct ceph_mon_client *monc, +int ceph_monc_blocklist_add(struct ceph_mon_client *monc, struct ceph_entity_addr *client_addr); extern int ceph_monc_open_session(struct ceph_mon_client *monc); diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 3a518fd0eaad..43a7a1573b51 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -424,7 +424,7 @@ enum { }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ -#define EBLACKLISTED ESHUTDOWN /* blacklisted */ +#define EBLOCKLISTED ESHUTDOWN /* blocklisted */ /* xattr comparison */ enum { diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index d633a0aeaa55..efcdde471278 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -896,7 +896,7 @@ static void handle_command_ack(struct ceph_mon_client *monc, ceph_msg_dump(msg); } -int ceph_monc_blacklist_add(struct ceph_mon_client *monc, +int ceph_monc_blocklist_add(struct ceph_mon_client *monc, struct ceph_entity_addr *client_addr) { struct ceph_mon_generic_request *req; @@ -936,9 +936,9 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc, ret = wait_generic_request(req); if (!ret) /* - * Make sure we have the osdmap that includes the blacklist + * Make sure we have the osdmap that includes the blocklist * entry. This is needed to ensure that the OSDs pick up the - * new blacklist before processing any future requests from + * new blocklist before processing any future requests from * this client. */ ret = ceph_wait_for_latest_osdmap(monc->client, 0); @@ -947,7 +947,7 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc, put_generic_request(req); return ret; } -EXPORT_SYMBOL(ceph_monc_blacklist_add); +EXPORT_SYMBOL(ceph_monc_blocklist_add); /* * Resend pending generic requests. From 1b05fae7f29db4c41864aed903865086e070fa89 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 15 Sep 2020 20:38:34 +0200 Subject: [PATCH 184/243] libceph: switch to the new "osd blocklist add" command Signed-off-by: Ilya Dryomov --- net/ceph/mon_client.c | 67 +++++++++++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index efcdde471278..c4cf2529d08b 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -896,8 +896,9 @@ static void handle_command_ack(struct ceph_mon_client *monc, ceph_msg_dump(msg); } -int ceph_monc_blocklist_add(struct ceph_mon_client *monc, - struct ceph_entity_addr *client_addr) +static __printf(2, 0) +int do_mon_command_vargs(struct ceph_mon_client *monc, const char *fmt, + va_list ap) { struct ceph_mon_generic_request *req; struct ceph_mon_command *h; @@ -925,28 +926,64 @@ int ceph_monc_blocklist_add(struct ceph_mon_client *monc, h->monhdr.session_mon_tid = 0; h->fsid = monc->monmap->fsid; h->num_strs = cpu_to_le32(1); - len = sprintf(h->str, "{ \"prefix\": \"osd blacklist\", \ - \"blacklistop\": \"add\", \ - \"addr\": \"%pISpc/%u\" }", - &client_addr->in_addr, le32_to_cpu(client_addr->nonce)); + len = vsprintf(h->str, fmt, ap); h->str_len = cpu_to_le32(len); send_generic_request(monc, req); mutex_unlock(&monc->mutex); ret = wait_generic_request(req); - if (!ret) - /* - * Make sure we have the osdmap that includes the blocklist - * entry. This is needed to ensure that the OSDs pick up the - * new blocklist before processing any future requests from - * this client. - */ - ret = ceph_wait_for_latest_osdmap(monc->client, 0); - out: put_generic_request(req); return ret; } + +static __printf(2, 3) +int do_mon_command(struct ceph_mon_client *monc, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = do_mon_command_vargs(monc, fmt, ap); + va_end(ap); + return ret; +} + +int ceph_monc_blocklist_add(struct ceph_mon_client *monc, + struct ceph_entity_addr *client_addr) +{ + int ret; + + ret = do_mon_command(monc, + "{ \"prefix\": \"osd blocklist\", \ + \"blocklistop\": \"add\", \ + \"addr\": \"%pISpc/%u\" }", + &client_addr->in_addr, + le32_to_cpu(client_addr->nonce)); + if (ret == -EINVAL) { + /* + * The monitor returns EINVAL on an unrecognized command. + * Try the legacy command -- it is exactly the same except + * for the name. + */ + ret = do_mon_command(monc, + "{ \"prefix\": \"osd blacklist\", \ + \"blacklistop\": \"add\", \ + \"addr\": \"%pISpc/%u\" }", + &client_addr->in_addr, + le32_to_cpu(client_addr->nonce)); + } + if (ret) + return ret; + + /* + * Make sure we have the osdmap that includes the blocklist + * entry. This is needed to ensure that the OSDs pick up the + * new blocklist before processing any future requests from + * this client. + */ + return ceph_wait_for_latest_osdmap(monc->client, 0); +} EXPORT_SYMBOL(ceph_monc_blocklist_add); /* From 4bb926e83f1e2d80409cc8b45336f5f303d49315 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 15 Sep 2020 21:11:30 +0200 Subject: [PATCH 185/243] ceph: add a note explaining session reject error string error_string key in the metadata map of MClientSession message is intended for humans, but unfortunately became part of the on-wire format with the introduction of recover_session=clean mode in commit 131d7eb4faa1 ("ceph: auto reconnect after blacklisted"). Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index bb2d938a17ac..08f1c0c31dc2 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3317,6 +3317,10 @@ static int __decode_session_metadata(void **p, void *end, *p += len; ceph_decode_32_safe(p, end, len, bad); ceph_decode_need(p, end, len, bad); + /* + * Match "blocklisted (blacklisted)" from newer MDSes, + * or "blacklisted" from older MDSes. + */ if (err_str && strnstr(*p, "blacklisted", len)) *blocklisted = true; *p += len; From 18d620f063b0780db2a86343dcf3a18e363626b9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 28 May 2020 13:56:54 -0400 Subject: [PATCH 186/243] ceph: break out writeback of incompatible snap context to separate function When dirtying a page, we have to flush incompatible contexts. Move the search for an incompatible context into a separate function, and fix up the caller to wait and retry if there is one. Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 114 +++++++++++++++++++++++++++++-------------------- 1 file changed, 68 insertions(+), 46 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 7b1f3dad576f..f8b478237ea8 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1298,6 +1298,62 @@ static int context_is_writeable_or_written(struct inode *inode, return ret; } +/** + * ceph_find_incompatible - find an incompatible context and return it + * @inode: inode associated with page + * @page: page being dirtied + * + * We are only allowed to write into/dirty a page if the page is + * clean, or already dirty within the same snap context. Returns a + * conflicting context if there is one, NULL if there isn't, or a + * negative error code on other errors. + * + * Must be called with page lock held. + */ +static struct ceph_snap_context * +ceph_find_incompatible(struct inode *inode, struct page *page) +{ + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_inode_info *ci = ceph_inode(inode); + + if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + dout(" page %p forced umount\n", page); + return ERR_PTR(-EIO); + } + + for (;;) { + struct ceph_snap_context *snapc, *oldest; + + wait_on_page_writeback(page); + + snapc = page_snap_context(page); + if (!snapc || snapc == ci->i_head_snapc) + break; + + /* + * this page is already dirty in another (older) snap + * context! is it writeable now? + */ + oldest = get_oldest_context(inode, NULL, NULL); + if (snapc->seq > oldest->seq) { + /* not writeable -- return it for the caller to deal with */ + ceph_put_snap_context(oldest); + dout(" page %p snapc %p not current or oldest\n", page, snapc); + return ceph_get_snap_context(snapc); + } + ceph_put_snap_context(oldest); + + /* yay, writeable, do it now (without dropping page lock) */ + dout(" page %p snapc %p not current, but oldest\n", page, snapc); + if (clear_page_dirty_for_io(page)) { + int r = writepage_nounlock(page, NULL); + if (r < 0) + return ERR_PTR(r); + } + } + return NULL; +} + /* * We are only allowed to write into/dirty the page if the page is * clean, or already dirty within the same snap context. @@ -1311,61 +1367,27 @@ static int ceph_update_writeable_page(struct file *file, struct page *page) { struct inode *inode = file_inode(file); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_snap_context *snapc; loff_t page_off = pos & PAGE_MASK; int pos_in_page = pos & ~PAGE_MASK; int end_in_page = pos_in_page + len; loff_t i_size; int r; - struct ceph_snap_context *snapc, *oldest; - - if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { - dout(" page %p forced umount\n", page); - unlock_page(page); - return -EIO; - } retry_locked: - /* writepages currently holds page lock, but if we change that later, */ - wait_on_page_writeback(page); - - snapc = page_snap_context(page); - if (snapc && snapc != ci->i_head_snapc) { - /* - * this page is already dirty in another (older) snap - * context! is it writeable now? - */ - oldest = get_oldest_context(inode, NULL, NULL); - if (snapc->seq > oldest->seq) { - ceph_put_snap_context(oldest); - dout(" page %p snapc %p not current or oldest\n", - page, snapc); - /* - * queue for writeback, and wait for snapc to - * be writeable or written - */ - snapc = ceph_get_snap_context(snapc); - unlock_page(page); - ceph_queue_writeback(inode); - r = wait_event_killable(ci->i_cap_wq, - context_is_writeable_or_written(inode, snapc)); - ceph_put_snap_context(snapc); - if (r == -ERESTARTSYS) - return r; - return -EAGAIN; - } - ceph_put_snap_context(oldest); - - /* yay, writeable, do it now (without dropping page lock) */ - dout(" page %p snapc %p not current, but oldest\n", - page, snapc); - if (!clear_page_dirty_for_io(page)) - goto retry_locked; - r = writepage_nounlock(page, NULL); - if (r < 0) + snapc = ceph_find_incompatible(inode, page); + if (snapc) { + if (IS_ERR(snapc)) { + r = PTR_ERR(snapc); goto fail_unlock; - goto retry_locked; + } + unlock_page(page); + ceph_queue_writeback(inode); + r = wait_event_killable(ci->i_cap_wq, + context_is_writeable_or_written(inode, snapc)); + ceph_put_snap_context(snapc); + return -EAGAIN; } if (PageUptodate(page)) { From d45156bf46c082d8164d93ca0dec9e7ac808dbdc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 28 May 2020 14:59:49 -0400 Subject: [PATCH 187/243] ceph: don't call ceph_update_writeable_page from page_mkwrite page_mkwrite should only be called with Uptodate pages, so we should only need to flush incompatible snap contexts. Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index f8b478237ea8..c2c23b468d13 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1300,7 +1300,6 @@ static int context_is_writeable_or_written(struct inode *inode, /** * ceph_find_incompatible - find an incompatible context and return it - * @inode: inode associated with page * @page: page being dirtied * * We are only allowed to write into/dirty a page if the page is @@ -1311,8 +1310,9 @@ static int context_is_writeable_or_written(struct inode *inode, * Must be called with page lock held. */ static struct ceph_snap_context * -ceph_find_incompatible(struct inode *inode, struct page *page) +ceph_find_incompatible(struct page *page) { + struct inode *inode = page->mapping->host; struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); @@ -1376,7 +1376,7 @@ static int ceph_update_writeable_page(struct file *file, int r; retry_locked: - snapc = ceph_find_incompatible(inode, page); + snapc = ceph_find_incompatible(page); if (snapc) { if (IS_ERR(snapc)) { r = PTR_ERR(snapc); @@ -1689,6 +1689,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) inode_inc_iversion_raw(inode); do { + struct ceph_snap_context *snapc; + lock_page(page); if (page_mkwrite_check_truncate(page, inode) < 0) { @@ -1697,13 +1699,26 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) break; } - err = ceph_update_writeable_page(vma->vm_file, off, len, page); - if (err >= 0) { + snapc = ceph_find_incompatible(page); + if (!snapc) { /* success. we'll keep the page locked. */ set_page_dirty(page); ret = VM_FAULT_LOCKED; + break; } - } while (err == -EAGAIN); + + unlock_page(page); + + if (IS_ERR(snapc)) { + ret = VM_FAULT_SIGBUS; + break; + } + + ceph_queue_writeback(inode); + err = wait_event_killable(ci->i_cap_wq, + context_is_writeable_or_written(inode, snapc)); + ceph_put_snap_context(snapc); + } while (err == 0); if (ret == VM_FAULT_LOCKED || ci->i_inline_version != CEPH_INLINE_NONE) { From 9b4862ecaec5487ba9b192cc70d6fa0c7aef373a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 14 May 2020 12:05:45 -0400 Subject: [PATCH 188/243] ceph: fold ceph_sync_readpages into ceph_readpage Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 78 ++++++++++++++++---------------------------------- 1 file changed, 25 insertions(+), 53 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index c2c23b468d13..5493a5205a5f 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -182,58 +182,15 @@ static int ceph_releasepage(struct page *page, gfp_t g) return !PagePrivate(page); } -/* - * Read some contiguous pages. If we cross a stripe boundary, shorten - * *plen. Return number of bytes read, or error. - */ -static int ceph_sync_readpages(struct ceph_fs_client *fsc, - struct ceph_vino vino, - struct ceph_file_layout *layout, - u64 off, u64 *plen, - u32 truncate_seq, u64 truncate_size, - struct page **pages, int num_pages, - int page_align) -{ - struct ceph_osd_client *osdc = &fsc->client->osdc; - struct ceph_osd_request *req; - int rc = 0; - - dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, - vino.snap, off, *plen); - req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1, - CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, - NULL, truncate_seq, truncate_size, - false); - if (IS_ERR(req)) - return PTR_ERR(req); - - /* it may be a short read due to an object boundary */ - osd_req_op_extent_osd_data_pages(req, 0, - pages, *plen, page_align, false, false); - - dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n", - off, *plen, *plen, page_align); - - rc = ceph_osdc_start_request(osdc, req, false); - if (!rc) - rc = ceph_osdc_wait_request(osdc, req); - - ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency, - req->r_end_latency, rc); - - ceph_osdc_put_request(req); - dout("readpages result %d\n", rc); - return rc; -} - -/* - * read a single page, without unlocking it. - */ +/* read a single page, without unlocking it. */ static int ceph_do_readpage(struct file *filp, struct page *page) { struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_osd_client *osdc = &fsc->client->osdc; + struct ceph_osd_request *req; + struct ceph_vino vino = ceph_vino(inode); int err = 0; u64 off = page_offset(page); u64 len = PAGE_SIZE; @@ -260,12 +217,27 @@ static int ceph_do_readpage(struct file *filp, struct page *page) if (err == 0) return -EINPROGRESS; - dout("readpage inode %p file %p page %p index %lu\n", - inode, filp, page, page->index); - err = ceph_sync_readpages(fsc, ceph_vino(inode), - &ci->i_layout, off, &len, - ci->i_truncate_seq, ci->i_truncate_size, - &page, 1, 0); + dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n", + vino.ino, vino.snap, filp, off, len, page, page->index); + req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, 0, 1, + CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, + ci->i_truncate_seq, ci->i_truncate_size, + false); + if (IS_ERR(req)) + return PTR_ERR(req); + + osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false); + + err = ceph_osdc_start_request(osdc, req, false); + if (!err) + err = ceph_osdc_wait_request(osdc, req); + + ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency, + req->r_end_latency, err); + + ceph_osdc_put_request(req); + dout("readpage result %d\n", err); + if (err == -ENOENT) err = 0; if (err < 0) { From 6390987f2f4c5dcacb3ef7b9cb2ef5b8fdca3e10 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 14 Jul 2020 14:37:15 -0400 Subject: [PATCH 189/243] ceph: fold ceph_sync_writepages into writepage_nounlock Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 93 +++++++++++++++++++------------------------------- 1 file changed, 35 insertions(+), 58 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 5493a5205a5f..72cbaac68256 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -591,50 +591,6 @@ static u64 get_writepages_data_length(struct inode *inode, return end > start ? end - start : 0; } -/* - * do a synchronous write on N pages - */ -static int ceph_sync_writepages(struct ceph_fs_client *fsc, - struct ceph_vino vino, - struct ceph_file_layout *layout, - struct ceph_snap_context *snapc, - u64 off, u64 len, - u32 truncate_seq, u64 truncate_size, - struct timespec64 *mtime, - struct page **pages, int num_pages) -{ - struct ceph_osd_client *osdc = &fsc->client->osdc; - struct ceph_osd_request *req; - int rc = 0; - int page_align = off & ~PAGE_MASK; - - req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1, - CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, - snapc, truncate_seq, truncate_size, - true); - if (IS_ERR(req)) - return PTR_ERR(req); - - /* it may be a short write due to an object boundary */ - osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align, - false, false); - dout("writepages %llu~%llu (%llu bytes)\n", off, len, len); - - req->r_mtime = *mtime; - rc = ceph_osdc_start_request(osdc, req, true); - if (!rc) - rc = ceph_osdc_wait_request(osdc, req); - - ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency, - req->r_end_latency, rc); - - ceph_osdc_put_request(req); - if (rc == 0) - rc = len; - dout("writepages result %d\n", rc); - return rc; -} - /* * Write a single page, but leave the page locked. * @@ -643,20 +599,19 @@ static int ceph_sync_writepages(struct ceph_fs_client *fsc, */ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) { - struct inode *inode; - struct ceph_inode_info *ci; - struct ceph_fs_client *fsc; + struct inode *inode = page->mapping->host; + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_snap_context *snapc, *oldest; loff_t page_off = page_offset(page); - int err, len = PAGE_SIZE; + int err; + loff_t len = PAGE_SIZE; struct ceph_writeback_ctl ceph_wbc; + struct ceph_osd_client *osdc = &fsc->client->osdc; + struct ceph_osd_request *req; dout("writepage %p idx %lu\n", page, page->index); - inode = page->mapping->host; - ci = ceph_inode(inode); - fsc = ceph_inode_to_client(inode); - /* verify this is a writeable snap context */ snapc = page_snap_context(page); if (!snapc) { @@ -685,7 +640,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) if (ceph_wbc.i_size < page_off + len) len = ceph_wbc.i_size - page_off; - dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n", + dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n", inode, page, page->index, page_off, len, snapc, snapc->seq); if (atomic_long_inc_return(&fsc->writeback_count) > @@ -693,11 +648,33 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); set_page_writeback(page); - err = ceph_sync_writepages(fsc, ceph_vino(inode), - &ci->i_layout, snapc, page_off, len, - ceph_wbc.truncate_seq, - ceph_wbc.truncate_size, - &inode->i_mtime, &page, 1); + req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1, + CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc, + ceph_wbc.truncate_seq, ceph_wbc.truncate_size, + true); + if (IS_ERR(req)) { + redirty_page_for_writepage(wbc, page); + end_page_writeback(page); + return PTR_ERR(req); + } + + /* it may be a short write due to an object boundary */ + WARN_ON_ONCE(len > PAGE_SIZE); + osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false); + dout("writepage %llu~%llu (%llu bytes)\n", page_off, len, len); + + req->r_mtime = inode->i_mtime; + err = ceph_osdc_start_request(osdc, req, true); + if (!err) + err = ceph_osdc_wait_request(osdc, req); + + ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency, + req->r_end_latency, err); + + ceph_osdc_put_request(req); + if (err == 0) + err = len; + if (err < 0) { struct writeback_control tmp_wbc; if (!wbc) From 1cc1699070bd8f42111b92e5c8018bd7d52f0003 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 5 Jun 2020 09:05:17 -0400 Subject: [PATCH 190/243] ceph: fold ceph_update_writeable_page into ceph_write_begin ...and reorganize the loop for better clarity. Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 152 +++++++++++++++++++++---------------------------- 1 file changed, 66 insertions(+), 86 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 72cbaac68256..97827f68a3e7 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1303,78 +1303,6 @@ ceph_find_incompatible(struct page *page) return NULL; } -/* - * We are only allowed to write into/dirty the page if the page is - * clean, or already dirty within the same snap context. - * - * called with page locked. - * return success with page locked, - * or any failure (incl -EAGAIN) with page unlocked. - */ -static int ceph_update_writeable_page(struct file *file, - loff_t pos, unsigned len, - struct page *page) -{ - struct inode *inode = file_inode(file); - struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_snap_context *snapc; - loff_t page_off = pos & PAGE_MASK; - int pos_in_page = pos & ~PAGE_MASK; - int end_in_page = pos_in_page + len; - loff_t i_size; - int r; - -retry_locked: - snapc = ceph_find_incompatible(page); - if (snapc) { - if (IS_ERR(snapc)) { - r = PTR_ERR(snapc); - goto fail_unlock; - } - unlock_page(page); - ceph_queue_writeback(inode); - r = wait_event_killable(ci->i_cap_wq, - context_is_writeable_or_written(inode, snapc)); - ceph_put_snap_context(snapc); - return -EAGAIN; - } - - if (PageUptodate(page)) { - dout(" page %p already uptodate\n", page); - return 0; - } - - /* full page? */ - if (pos_in_page == 0 && len == PAGE_SIZE) - return 0; - - /* past end of file? */ - i_size = i_size_read(inode); - - if (page_off >= i_size || - (pos_in_page == 0 && (pos+len) >= i_size && - end_in_page - pos_in_page != PAGE_SIZE)) { - dout(" zeroing %p 0 - %d and %d - %d\n", - page, pos_in_page, end_in_page, (int)PAGE_SIZE); - zero_user_segments(page, - 0, pos_in_page, - end_in_page, PAGE_SIZE); - return 0; - } - - /* we need to read it. */ - r = ceph_do_readpage(file, page); - if (r < 0) { - if (r == -EINPROGRESS) - return -EAGAIN; - goto fail_unlock; - } - goto retry_locked; -fail_unlock: - unlock_page(page); - return r; -} - /* * We are only allowed to write into/dirty the page if the page is * clean, or already dirty within the same snap context. @@ -1384,26 +1312,78 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = file_inode(file); - struct page *page; + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_snap_context *snapc; + struct page *page = NULL; pgoff_t index = pos >> PAGE_SHIFT; - int r; + int pos_in_page = pos & ~PAGE_MASK; + int r = 0; - do { - /* get a page */ + dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len); + + for (;;) { page = grab_cache_page_write_begin(mapping, index, 0); - if (!page) - return -ENOMEM; + if (!page) { + r = -ENOMEM; + break; + } - dout("write_begin file %p inode %p page %p %d~%d\n", file, - inode, page, (int)pos, (int)len); - - r = ceph_update_writeable_page(file, pos, len, page); - if (r < 0) + snapc = ceph_find_incompatible(page); + if (snapc) { + if (IS_ERR(snapc)) { + r = PTR_ERR(snapc); + break; + } + unlock_page(page); put_page(page); - else - *pagep = page; - } while (r == -EAGAIN); + page = NULL; + ceph_queue_writeback(inode); + r = wait_event_killable(ci->i_cap_wq, + context_is_writeable_or_written(inode, snapc)); + ceph_put_snap_context(snapc); + if (r != 0) + break; + continue; + } + if (PageUptodate(page)) { + dout(" page %p already uptodate\n", page); + break; + } + + /* + * In some cases we don't need to read at all: + * - full page write + * - write that lies completely beyond EOF + * - write that covers the the page from start to EOF or beyond it + */ + if ((pos_in_page == 0 && len == PAGE_SIZE) || + (pos >= i_size_read(inode)) || + (pos_in_page == 0 && (pos + len) >= i_size_read(inode))) { + zero_user_segments(page, 0, pos_in_page, + pos_in_page + len, PAGE_SIZE); + break; + } + + /* + * We need to read it. If we get back -EINPROGRESS, then the page was + * handed off to fscache and it will be unlocked when the read completes. + * Refind the page in that case so we can reacquire the page lock. Otherwise + * we got a hard error or the read was completed synchronously. + */ + r = ceph_do_readpage(file, page); + if (r != -EINPROGRESS) + break; + } + + if (r < 0) { + if (page) { + unlock_page(page); + put_page(page); + } + } else { + *pagep = page; + } return r; } From f6fbdcd997f5d5d0658204ca42abdeced56fd4e5 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 17 Sep 2020 18:45:44 +0200 Subject: [PATCH 191/243] ceph: mark ceph_fmt_xattr() as printf-like for better type checking Signed-off-by: Ilya Dryomov --- fs/ceph/xattr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 3a733ac33d9b..197cb1234341 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -116,7 +116,8 @@ static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, * NULL terminates however, so call it on a temporary buffer and then memcpy * the result into place. */ -static int ceph_fmt_xattr(char *val, size_t size, const char *fmt, ...) +static __printf(3, 4) +int ceph_fmt_xattr(char *val, size_t size, const char *fmt, ...) { int ret; va_list args; From 7edf1ec5b249cb7f9b85b7257f638026fd1a5d2b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 Oct 2020 13:40:49 -0400 Subject: [PATCH 192/243] ceph: don't SetPageError on readpage errors PageError really only has meaning within a particular subsystem. Nothing looks at this bit in the core kernel code, and ceph itself doesn't care about it. Don't bother setting the PageError bit on error. Signed-off-by: Jeff Layton Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 97827f68a3e7..137c0a5a2a0d 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -241,7 +241,6 @@ static int ceph_do_readpage(struct file *filp, struct page *page) if (err == -ENOENT) err = 0; if (err < 0) { - SetPageError(page); ceph_fscache_readpage_cancel(inode, page); if (err == -EBLOCKLISTED) fsc->blocklisted = true; From c403c3a2fbe24d4ed33e10cabad048583ebd4edf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 4 Oct 2020 19:04:24 +0100 Subject: [PATCH 193/243] ceph: promote to unsigned long long before shifting On 32-bit systems, this shift will overflow for files larger than 4GB. Cc: stable@vger.kernel.org Fixes: 61f68816211e ("ceph: check caps in filemap_fault and page_mkwrite") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 137c0a5a2a0d..35c83f65475b 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1471,7 +1471,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; struct page *pinned_page = NULL; - loff_t off = vmf->pgoff << PAGE_SHIFT; + loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT; int want, got, err; sigset_t oldset; vm_fault_t ret = VM_FAULT_SIGBUS; From 5231198089afba422dd2c46fef515291fca53bdd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Mar 2020 13:07:23 -0400 Subject: [PATCH 194/243] ceph: drop separate mdsc argument from __send_cap We can get it from the session if we need it. Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c7e69547628e..32937bec4a25 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1454,8 +1454,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, * * Caller should hold snap_rwsem (read), s_mutex. */ -static void __send_cap(struct ceph_mds_client *mdsc, struct cap_msg_args *arg, - struct ceph_inode_info *ci) +static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci) { struct inode *inode = &ci->vfs_inode; int ret; @@ -1467,7 +1466,7 @@ static void __send_cap(struct ceph_mds_client *mdsc, struct cap_msg_args *arg, ceph_vinop(inode), ceph_cap_string(arg->dirty), arg->flush_tid); spin_lock(&ci->i_ceph_lock); - __cap_delay_requeue(mdsc, ci); + __cap_delay_requeue(arg->session->s_mdsc, ci); spin_unlock(&ci->i_ceph_lock); } @@ -2147,7 +2146,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, want, retain, flushing, flush_tid, oldest_flush_tid); spin_unlock(&ci->i_ceph_lock); - __send_cap(mdsc, &arg, ci); + __send_cap(&arg, ci); goto retry; /* retake i_ceph_lock and restart our cap scan. */ } @@ -2221,7 +2220,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid) flushing, flush_tid, oldest_flush_tid); spin_unlock(&ci->i_ceph_lock); - __send_cap(mdsc, &arg, ci); + __send_cap(&arg, ci); } else { if (!list_empty(&ci->i_cap_flush_list)) { struct ceph_cap_flush *cf = @@ -2435,7 +2434,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, (cap->issued | cap->implemented), cf->caps, cf->tid, oldest_flush_tid); spin_unlock(&ci->i_ceph_lock); - __send_cap(mdsc, &arg, ci); + __send_cap(&arg, ci); } else { struct ceph_cap_snap *capsnap = container_of(cf, struct ceph_cap_snap, From 16d68903f56ae277446cc2d24ab18db835363eda Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Mar 2020 07:20:27 -0400 Subject: [PATCH 195/243] ceph: break up send_cap_msg Push the allocation of the msg and the send into the caller. Rename the function to encode_cap_msg and make it void return. Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 60 +++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 32937bec4a25..0874ac445cdc 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1222,36 +1222,27 @@ struct cap_msg_args { }; /* - * Build and send a cap message to the given MDS. - * - * Caller should be holding s_mutex. + * cap struct size + flock buffer size + inline version + inline data size + + * osd_epoch_barrier + oldest_flush_tid */ -static int send_cap_msg(struct cap_msg_args *arg) +#define CAP_MSG_SIZE (sizeof(struct ceph_mds_caps) + \ + 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4) + +/* Marshal up the cap msg to the MDS */ +static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg) { struct ceph_mds_caps *fc; - struct ceph_msg *msg; void *p; - size_t extra_len; struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc; - dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" - " seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu" - " xattr_ver %llu xattr_len %d\n", ceph_cap_op_name(arg->op), - arg->cid, arg->ino, ceph_cap_string(arg->caps), - ceph_cap_string(arg->wanted), ceph_cap_string(arg->dirty), - arg->seq, arg->issue_seq, arg->flush_tid, arg->oldest_flush_tid, - arg->mseq, arg->follows, arg->size, arg->max_size, - arg->xattr_version, + dout("%s %s %llx %llx caps %s wanted %s dirty %s seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu xattr_ver %llu xattr_len %d\n", + __func__, ceph_cap_op_name(arg->op), arg->cid, arg->ino, + ceph_cap_string(arg->caps), ceph_cap_string(arg->wanted), + ceph_cap_string(arg->dirty), arg->seq, arg->issue_seq, + arg->flush_tid, arg->oldest_flush_tid, arg->mseq, arg->follows, + arg->size, arg->max_size, arg->xattr_version, arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0); - /* flock buffer size + inline version + inline data size + - * osd_epoch_barrier + oldest_flush_tid */ - extra_len = 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4; - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len, - GFP_NOFS, false); - if (!msg) - return -ENOMEM; - msg->hdr.version = cpu_to_le16(10); msg->hdr.tid = cpu_to_le64(arg->flush_tid); @@ -1323,9 +1314,6 @@ static int send_cap_msg(struct cap_msg_args *arg) /* Advisory flags (version 10) */ ceph_encode_32(&p, arg->flags); - - ceph_con_send(&arg->session->s_con, msg); - return 0; } /* @@ -1456,22 +1444,23 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, */ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci) { + struct ceph_msg *msg; struct inode *inode = &ci->vfs_inode; - int ret; - ret = send_cap_msg(arg); - if (ret < 0) { - pr_err("error sending cap msg, ino (%llx.%llx) " - "flushing %s tid %llu, requeue\n", + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false); + if (!msg) { + pr_err("error allocating cap msg: ino (%llx.%llx) flushing %s tid %llu, requeuing cap.\n", ceph_vinop(inode), ceph_cap_string(arg->dirty), arg->flush_tid); spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(arg->session->s_mdsc, ci); spin_unlock(&ci->i_ceph_lock); + return; } + encode_cap_msg(msg, arg); + ceph_con_send(&arg->session->s_con, msg); ceph_buffer_put(arg->old_xattr_buf); - if (arg->wake) wake_up_all(&ci->i_cap_wq); } @@ -1482,6 +1471,11 @@ static inline int __send_flush_snap(struct inode *inode, u32 mseq, u64 oldest_flush_tid) { struct cap_msg_args arg; + struct ceph_msg *msg; + + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false); + if (!msg) + return -ENOMEM; arg.session = session; arg.ino = ceph_vino(inode).ino; @@ -1520,7 +1514,9 @@ static inline int __send_flush_snap(struct inode *inode, arg.flags = 0; arg.wake = false; - return send_cap_msg(&arg); + encode_cap_msg(msg, &arg); + ceph_con_send(&arg.session->s_con, msg); + return 0; } /* From c74d79af9002a9cb179f7950a2c266f0de964abe Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 6 Oct 2020 12:24:19 -0400 Subject: [PATCH 196/243] ceph: comment cleanups and clarifications Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 16 ++++++++++++++++ fs/ceph/mds_client.h | 2 +- fs/ceph/super.h | 3 ++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0874ac445cdc..5027bbdca419 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1922,12 +1922,24 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, retry: spin_lock(&ci->i_ceph_lock); retry_locked: + /* Caps wanted by virtue of active open files. */ file_wanted = __ceph_caps_file_wanted(ci); + + /* Caps which have active references against them */ used = __ceph_caps_used(ci); + + /* + * "issued" represents the current caps that the MDS wants us to have. + * "implemented" is the set that we have been granted, and includes the + * ones that have not yet been returned to the MDS (the "revoking" set, + * usually because they have outstanding references). + */ issued = __ceph_caps_issued(ci, &implemented); revoking = implemented & ~issued; want = file_wanted; + + /* The ones we currently want to retain (may be adjusted below) */ retain = file_wanted | used | CEPH_CAP_PIN; if (!mdsc->stopping && inode->i_nlink > 0) { if (file_wanted) { @@ -2005,6 +2017,10 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, /* NOTE: no side-effects allowed, until we take s_mutex */ + /* + * If we have an auth cap, we don't need to consider any + * overlapping caps as used. + */ cap_used = used; if (ci->i_auth_cap && cap != ci->i_auth_cap) cap_used &= ~ci->i_auth_cap->issued; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 658800605bfb..cbf8af437140 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -393,7 +393,7 @@ struct ceph_mds_client { struct ceph_mds_session **sessions; /* NULL for mds if no session */ atomic_t num_sessions; - int max_sessions; /* len of s_mds_sessions */ + int max_sessions; /* len of sessions array */ int stopping; /* true if shutting down */ atomic64_t quotarealms_count; /* # realms with quota */ diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 582694899130..482473e4cce1 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -160,7 +160,8 @@ struct ceph_cap { int issued; /* latest, from the mds */ int implemented; /* implemented superset of issued (for revocation) */ - int mds, mds_wanted; + int mds; /* mds index for this cap */ + int mds_wanted; /* caps wanted from this mds */ }; /* caps to release */ struct { From 5a5036c89f345769c0049f4af04cc6647c0df058 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Oct 2020 14:15:59 +0200 Subject: [PATCH 197/243] libceph: move a dout in queue_con_delay() The queued con->work can start executing (and therefore logging) before we get to this "con->work has been queued" message, making the logs confusing. Move it up, with the meaning of "con->work is about to be queued". Signed-off-by: Ilya Dryomov --- net/ceph/messenger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d4d7a0e52491..8eabcdc2af56 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2811,13 +2811,13 @@ static int queue_con_delay(struct ceph_connection *con, unsigned long delay) return -ENOENT; } + dout("%s %p %lu\n", __func__, con, delay); if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { dout("%s %p - already queued\n", __func__, con); con->ops->put(con); return -EBUSY; } - dout("%s %p %lu\n", __func__, con, delay); return 0; } From b07720d0bd1e7c2251642010efb6075dbee23bb8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Oct 2020 14:38:08 +0200 Subject: [PATCH 198/243] libceph: fix ENTITY_NAME format suggestion Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 76371aaae2d1..60b324efd1c4 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -54,7 +54,7 @@ struct ceph_connection_operations { int (*check_message_signature) (struct ceph_msg *msg); }; -/* use format string %s%d */ +/* use format string %s%lld */ #define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num) struct ceph_messenger { From a9dfe31e5ce31022b8f063b30d2907872cbab447 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 3 Oct 2020 11:52:15 +0200 Subject: [PATCH 199/243] libceph: format ceph_entity_addr nonces as unsigned Match the server side logs. Signed-off-by: Ilya Dryomov --- net/ceph/messenger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 8eabcdc2af56..02a195e013b7 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2016,11 +2016,11 @@ static int process_banner(struct ceph_connection *con) sizeof(con->peer_addr)) != 0 && !(addr_is_blank(&con->actual_peer_addr) && con->actual_peer_addr.nonce == con->peer_addr.nonce)) { - pr_warn("wrong peer, want %s/%d, got %s/%d\n", + pr_warn("wrong peer, want %s/%u, got %s/%u\n", ceph_pr_addr(&con->peer_addr), - (int)le32_to_cpu(con->peer_addr.nonce), + le32_to_cpu(con->peer_addr.nonce), ceph_pr_addr(&con->actual_peer_addr), - (int)le32_to_cpu(con->actual_peer_addr.nonce)); + le32_to_cpu(con->actual_peer_addr.nonce)); con->error_msg = "wrong peer at address"; return -1; } From 28e1581c3b4ea5f98530064a103c6217bedeea73 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 7 Oct 2020 20:06:48 +0200 Subject: [PATCH 200/243] libceph: clear con->out_msg on Policy::stateful_server faults con->out_msg must be cleared on Policy::stateful_server (!CEPH_MSG_CONNECT_LOSSY) faults. Not doing so botches the reconnection attempt, because after writing the banner the messenger moves on to writing the data section of that message (either from where it got interrupted by the connection reset or from the beginning) instead of writing struct ceph_msg_connect. This results in a bizarre error message because the server sends CEPH_MSGR_TAG_BADPROTOVER but we think we wrote struct ceph_msg_connect: libceph: mds0 (1)172.21.15.45:6828 socket error on write ceph: mds0 reconnect start libceph: mds0 (1)172.21.15.45:6829 socket closed (con state OPEN) libceph: mds0 (1)172.21.15.45:6829 protocol version mismatch, my 32 != server's 32 libceph: mds0 (1)172.21.15.45:6829 protocol version mismatch AFAICT this bug goes back to the dawn of the kernel client. The reason it survived for so long is that only MDS sessions are stateful and only two MDS messages have a data section: CEPH_MSG_CLIENT_RECONNECT (always, but reconnecting is rare) and CEPH_MSG_CLIENT_REQUEST (only when xattrs are involved). The connection has to get reset precisely when such message is being sent -- in this case it was the former. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/47723 Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- net/ceph/messenger.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 02a195e013b7..af0f1fa24937 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2998,6 +2998,11 @@ static void con_fault(struct ceph_connection *con) ceph_msg_put(con->in_msg); con->in_msg = NULL; } + if (con->out_msg) { + BUG_ON(con->out_msg->con != con); + ceph_msg_put(con->out_msg); + con->out_msg = NULL; + } /* Requeue anything that hasn't been acked */ list_splice_init(&con->out_sent, &con->out_queue); From a2d24bcb97dc7b0be1cb891e60ae133bdf36c786 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Sat, 10 Oct 2020 10:03:12 -0400 Subject: [PATCH 201/243] nfs: add missing "posix" local_lock constant table definition "mount -o local_lock=posix..." was broken by the mount API conversion due to the missing constant. Fixes: e38bb238ed8c ("NFS: Convert mount option parsing to use functionality from fs_parser.h") Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 524812984e2d..009987e69020 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -94,6 +94,7 @@ enum { static const struct constant_table nfs_param_enums_local_lock[] = { { "all", Opt_local_lock_all }, { "flock", Opt_local_lock_flock }, + { "posix", Opt_local_lock_posix }, { "none", Opt_local_lock_none }, {} }; From f82cd2f0b5eb715b1a296e20b34da7d296b6e9a4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 18 Aug 2020 09:05:56 -0400 Subject: [PATCH 202/243] XArray: Add private interface for workingset node deletion Move the tricky bits of dealing with the XArray from the workingset code to the XArray. Make it clear in the documentation that this is a private interface, and only export it for the benefit of the test suite. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 2 ++ lib/test_xarray.c | 7 +------ lib/xarray.c | 23 +++++++++++++++++++++++ mm/workingset.c | 13 ++----------- 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 6b336098fca7..29db4e16eb89 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1286,6 +1286,8 @@ static inline bool xa_is_advanced(const void *entry) */ typedef void (*xa_update_node_t)(struct xa_node *node); +void xa_delete_node(struct xa_node *, xa_update_node_t); + /* * The xa_state is opaque to its users. It contains various different pieces * of state involved in the current operation on the XArray. It should be diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 1122c4453c87..64ae07b1bcf4 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1600,14 +1600,9 @@ static noinline void shadow_remove(struct xarray *xa) xa_lock(xa); while ((node = list_first_entry_or_null(&shadow_nodes, struct xa_node, private_list))) { - XA_STATE(xas, node->array, 0); XA_BUG_ON(xa, node->array != xa); list_del_init(&node->private_list); - xas.xa_node = xa_parent_locked(node->array, node); - xas.xa_offset = node->offset; - xas.xa_shift = node->shift + XA_CHUNK_SHIFT; - xas_set_update(&xas, test_update_node); - xas_store(&xas, NULL); + xa_delete_node(node, test_update_node); } xa_unlock(xa); } diff --git a/lib/xarray.c b/lib/xarray.c index e9e641d3c0c3..1fa5c5658e63 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1973,6 +1973,29 @@ unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start, } EXPORT_SYMBOL(xa_extract); +/** + * xa_delete_node() - Private interface for workingset code. + * @node: Node to be removed from the tree. + * @update: Function to call to update ancestor nodes. + * + * Context: xa_lock must be held on entry and will not be released. + */ +void xa_delete_node(struct xa_node *node, xa_update_node_t update) +{ + struct xa_state xas = { + .xa = node->array, + .xa_index = (unsigned long)node->offset << + (node->shift + XA_CHUNK_SHIFT), + .xa_shift = node->shift + XA_CHUNK_SHIFT, + .xa_offset = node->offset, + .xa_node = xa_parent_locked(node->array, node), + .xa_update = update, + }; + + xas_store(&xas, NULL); +} +EXPORT_SYMBOL_GPL(xa_delete_node); /* For the benefit of the test suite */ + /** * xa_destroy() - Free all internal data structures. * @xa: XArray. diff --git a/mm/workingset.c b/mm/workingset.c index 92e66113a577..e185bfb8bd4e 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -519,12 +519,11 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, void *arg) __must_hold(lru_lock) { struct xa_node *node = container_of(item, struct xa_node, private_list); - XA_STATE(xas, node->array, 0); struct address_space *mapping; int ret; /* - * Page cache insertions and deletions synchroneously maintain + * Page cache insertions and deletions synchronously maintain * the shadow node LRU under the i_pages lock and the * lru_lock. Because the page cache tree is emptied before * the inode can be destroyed, holding the lru_lock pins any @@ -559,15 +558,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, if (WARN_ON_ONCE(node->count != node->nr_values)) goto out_invalid; mapping->nrexceptional -= node->nr_values; - xas.xa_node = xa_parent_locked(&mapping->i_pages, node); - xas.xa_offset = node->offset; - xas.xa_shift = node->shift + XA_CHUNK_SHIFT; - xas_set_update(&xas, workingset_update_node); - /* - * We could store a shadow entry here which was the minimum of the - * shadow entries we were tracking ... - */ - xas_store(&xas, NULL); + xa_delete_node(node, workingset_update_node); __inc_lruvec_slab_state(node, WORKINGSET_NODERECLAIM); out_invalid: From ca7b639e8611b3260a30b18aaa0d6db9c80a75ef Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 2 Aug 2020 14:17:21 -0400 Subject: [PATCH 203/243] XArray: Fix xas_reload for multi-index entries xas_reload() was only checking that the head entry was still at the head index. If the entry has been split, that's not enough as there may be a different entry at the specified index now. Solve this by checking the slot for the requested index instead of the head index. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 29db4e16eb89..4be9c57132fe 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1524,10 +1524,21 @@ void xas_create_range(struct xa_state *); static inline void *xas_reload(struct xa_state *xas) { struct xa_node *node = xas->xa_node; + void *entry; + char offset; - if (node) - return xa_entry(xas->xa, node, xas->xa_offset); - return xa_head(xas->xa); + if (!node) + return xa_head(xas->xa); + if (IS_ENABLED(CONFIG_XARRAY_MULTI)) { + offset = (xas->xa_index >> node->shift) & XA_CHUNK_MASK; + entry = xa_entry(xas->xa, node, offset); + if (!xa_is_sibling(entry)) + return entry; + offset = xa_to_sibling(entry); + } else { + offset = xas->xa_offset; + } + return xa_entry(xas->xa, node, offset); } /** From f78b8250a076ac63ddd021c7ea9739bcc2f6f737 Mon Sep 17 00:00:00 2001 From: Hui Su Date: Mon, 28 Sep 2020 01:15:53 +0800 Subject: [PATCH 204/243] radix-tree: fix the comment of radix_tree_next_slot() fix the comment of radix_tree_next_slot(): interator --> iterator. Signed-off-by: Hui Su Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/radix-tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 5c85059a92ba..64ad900ac742 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -377,7 +377,7 @@ radix_tree_chunk_size(struct radix_tree_iter *iter) * radix_tree_next_slot - find next slot in chunk * * @slot: pointer to current slot - * @iter: pointer to interator state + * @iter: pointer to iterator state * @flags: RADIX_TREE_ITER_*, should be constant * Returns: pointer to next slot, or NULL if there no more left * From 84c34df158cf215b0cd1475ab3b8e6f212f81f23 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 13 Oct 2020 08:46:29 -0400 Subject: [PATCH 205/243] XArray: Fix xas_create_range for ranges above 4 billion The 'sibs' variable would be shifted as a 32-bit integer, so if 'shift' is more than 32, this is undefined behaviour. In practice, this doesn't happen because the page cache is the only user and nobody uses 16TB pages. Signed-off-by: Matthew Wilcox (Oracle) --- lib/xarray.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/xarray.c b/lib/xarray.c index 1fa5c5658e63..2046d676ab41 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -703,7 +703,7 @@ void xas_create_range(struct xa_state *xas) unsigned char shift = xas->xa_shift; unsigned char sibs = xas->xa_sibs; - xas->xa_index |= ((sibs + 1) << shift) - 1; + xas->xa_index |= ((sibs + 1UL) << shift) - 1; if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift) xas->xa_offset |= sibs; xas->xa_shift = 0; From 61ca2c4afd9d108899e0fa48e7b1cfc9afe80596 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Mon, 12 Oct 2020 14:43:39 -0700 Subject: [PATCH 206/243] NFS: Only reference user namespace from nfs4idmap struct instead of cred The nfs4idmapper only needs access to the user namespace, and not the entire cred struct. This replaces the struct cred* member with struct user_namespace*. This is mostly hygiene, so we don't have to hold onto the cred object, which has extraneous references to things like user_struct. This also makes switching away from init_user_ns more straightforward in the future. Signed-off-by: Sargun Dhillon Signed-off-by: Anna Schumaker --- fs/nfs/nfs4idmap.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 62e6eea5c516..8d8aba305ecc 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "internal.h" #include "netns.h" @@ -69,13 +70,13 @@ struct idmap { struct rpc_pipe *idmap_pipe; struct idmap_legacy_upcalldata *idmap_upcall_data; struct mutex idmap_mutex; - const struct cred *cred; + struct user_namespace *user_ns; }; static struct user_namespace *idmap_userns(const struct idmap *idmap) { - if (idmap && idmap->cred) - return idmap->cred->user_ns; + if (idmap && idmap->user_ns) + return idmap->user_ns; return &init_user_ns; } @@ -286,7 +287,7 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen, if (ret < 0) return ERR_PTR(ret); - if (!idmap->cred || idmap->cred->user_ns == &init_user_ns) + if (!idmap->user_ns || idmap->user_ns == &init_user_ns) rkey = request_key(&key_type_id_resolver, desc, ""); if (IS_ERR(rkey)) { mutex_lock(&idmap->idmap_mutex); @@ -462,7 +463,7 @@ nfs_idmap_new(struct nfs_client *clp) return -ENOMEM; mutex_init(&idmap->idmap_mutex); - idmap->cred = get_cred(clp->cl_rpcclient->cl_cred); + idmap->user_ns = get_user_ns(clp->cl_rpcclient->cl_cred->user_ns); rpc_init_pipe_dir_object(&idmap->idmap_pdo, &nfs_idmap_pipe_dir_object_ops, @@ -486,7 +487,7 @@ nfs_idmap_new(struct nfs_client *clp) err_destroy_pipe: rpc_destroy_pipe_data(idmap->idmap_pipe); err: - put_cred(idmap->cred); + get_user_ns(idmap->user_ns); kfree(idmap); return error; } @@ -503,7 +504,7 @@ nfs_idmap_delete(struct nfs_client *clp) &clp->cl_rpcclient->cl_pipedir_objects, &idmap->idmap_pdo); rpc_destroy_pipe_data(idmap->idmap_pipe); - put_cred(idmap->cred); + put_user_ns(idmap->user_ns); kfree(idmap); } From 770c03e6dabacd5b9f57bba93c4311d32b618640 Mon Sep 17 00:00:00 2001 From: Fei Shao Date: Thu, 8 Oct 2020 17:34:14 +0800 Subject: [PATCH 207/243] rtc: mt6397: Remove unused member dev Removing the struct member "dev" in mt6397 RTC driver because it's not initialized and the only usage is for one debugging message. Also fixed a typo in the error message. Signed-off-by: Fei Shao Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201008093414.1911699-1-fshao@chromium.org --- drivers/rtc/rtc-mt6397.c | 3 ++- include/linux/mfd/mt6397/rtc.h | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c index f8b1353777ba..1894aded4c85 100644 --- a/drivers/rtc/rtc-mt6397.c +++ b/drivers/rtc/rtc-mt6397.c @@ -31,7 +31,8 @@ static int mtk_rtc_write_trigger(struct mt6397_rtc *rtc) MTK_RTC_POLL_DELAY_US, MTK_RTC_POLL_TIMEOUT); if (ret < 0) - dev_err(rtc->dev, "failed to write WRTGE: %d\n", ret); + dev_err(rtc->rtc_dev->dev.parent, + "failed to write WRTGR: %d\n", ret); return ret; } diff --git a/include/linux/mfd/mt6397/rtc.h b/include/linux/mfd/mt6397/rtc.h index 66989a16221a..c3748b53bf7d 100644 --- a/include/linux/mfd/mt6397/rtc.h +++ b/include/linux/mfd/mt6397/rtc.h @@ -72,7 +72,6 @@ struct mtk_rtc_data { }; struct mt6397_rtc { - struct device *dev; struct rtc_device *rtc_dev; /* Protect register access from multiple tasks */ From 00e8e87f10155b06ae2bc9f93c4d006681640e65 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 9 Oct 2020 17:30:58 +0200 Subject: [PATCH 208/243] rtc: rv3028: fix clock output support rv3028_clkout_set_rate unconditionally sets RV3028_CLKOUT_CLKOE but clk_set_rate may be called with the clock disabled. Ensure the clock is kept disabled if it was not yet enabled. Also, the actual rate was overwritten when enabling the clock, properly write to the register only once. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201009153101.721149-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rv3028.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index ec84db0b3d7a..fcc21b1b07b4 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -619,24 +619,23 @@ static int rv3028_clkout_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { int i, ret; + u32 enabled; struct rv3028_data *rv3028 = clkout_hw_to_rv3028(hw); + ret = regmap_read(rv3028->regmap, RV3028_CLKOUT, &enabled); + if (ret < 0) + return ret; + ret = regmap_write(rv3028->regmap, RV3028_CLKOUT, 0x0); if (ret < 0) return ret; - for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) { - if (clkout_rates[i] == rate) { - ret = regmap_update_bits(rv3028->regmap, - RV3028_CLKOUT, - RV3028_CLKOUT_FD_MASK, i); - if (ret < 0) - return ret; + enabled &= RV3028_CLKOUT_CLKOE; + for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) + if (clkout_rates[i] == rate) return regmap_write(rv3028->regmap, RV3028_CLKOUT, - RV3028_CLKOUT_CLKSY | RV3028_CLKOUT_CLKOE); - } - } + RV3028_CLKOUT_CLKSY | enabled | i); return -EINVAL; } From c1efae1432018063e7e723c4267edd3f13ce0f48 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 9 Oct 2020 17:30:59 +0200 Subject: [PATCH 209/243] rtc: rv3028: fix trickle resistor values Version 1.0 of the application manual had the wrong resistor values. Fix them according to version 1.1 Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201009153101.721149-2-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rv3028.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index fcc21b1b07b4..5cfce6415d9c 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -95,7 +95,7 @@ struct rv3028_data { #endif }; -static u16 rv3028_trickle_resistors[] = {1000, 3000, 6000, 11000}; +static u16 rv3028_trickle_resistors[] = {3000, 5000, 9000, 15000}; static ssize_t timestamp0_store(struct device *dev, struct device_attribute *attr, From de0ad60e79e1ce563223b3fddf03fc19f606c6d4 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 9 Oct 2020 17:31:00 +0200 Subject: [PATCH 210/243] rtc: rv3028: factorize EERD bit handling Both rv3028_eeprom_write and rv3028_eeprom_read enable EERD before sending commands to the EEPROM and restore it afterwards. Factorize this code. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201009153101.721149-3-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rv3028.c | 118 +++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index 5cfce6415d9c..7b8823f43626 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -171,6 +171,44 @@ static const struct attribute_group rv3028_attr_group = { .attrs = rv3028_attrs, }; +static int rv3028_exit_eerd(struct rv3028_data *rv3028, u32 eerd) +{ + if (eerd) + return 0; + + return regmap_update_bits(rv3028->regmap, RV3028_CTRL1, RV3028_CTRL1_EERD, 0); +} + +static int rv3028_enter_eerd(struct rv3028_data *rv3028, u32 *eerd) +{ + u32 ctrl1, status; + int ret; + + ret = regmap_read(rv3028->regmap, RV3028_CTRL1, &ctrl1); + if (ret) + return ret; + + *eerd = ctrl1 & RV3028_CTRL1_EERD; + if (*eerd) + return 0; + + ret = regmap_update_bits(rv3028->regmap, RV3028_CTRL1, + RV3028_CTRL1_EERD, RV3028_CTRL1_EERD); + if (ret) + return ret; + + ret = regmap_read_poll_timeout(rv3028->regmap, RV3028_STATUS, status, + !(status & RV3028_STATUS_EEBUSY), + RV3028_EEBUSY_POLL, RV3028_EEBUSY_TIMEOUT); + if (ret) { + rv3028_exit_eerd(rv3028, *eerd); + + return ret; + } + + return 0; +} + static irqreturn_t rv3028_handle_irq(int irq, void *dev_id) { struct rv3028_data *rv3028 = dev_id; @@ -451,49 +489,36 @@ static int rv3028_nvram_read(void *priv, unsigned int offset, void *val, static int rv3028_eeprom_write(void *priv, unsigned int offset, void *val, size_t bytes) { - u32 status, ctrl1; - int i, ret, err; + struct rv3028_data *rv3028 = priv; + u32 status, eerd; + int i, ret; u8 *buf = val; - ret = regmap_read(priv, RV3028_CTRL1, &ctrl1); + ret = rv3028_enter_eerd(rv3028, &eerd); if (ret) return ret; - if (!(ctrl1 & RV3028_CTRL1_EERD)) { - ret = regmap_update_bits(priv, RV3028_CTRL1, - RV3028_CTRL1_EERD, RV3028_CTRL1_EERD); - if (ret) - return ret; - - ret = regmap_read_poll_timeout(priv, RV3028_STATUS, status, - !(status & RV3028_STATUS_EEBUSY), - RV3028_EEBUSY_POLL, - RV3028_EEBUSY_TIMEOUT); - if (ret) - goto restore_eerd; - } - for (i = 0; i < bytes; i++) { - ret = regmap_write(priv, RV3028_EEPROM_ADDR, offset + i); + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_ADDR, offset + i); if (ret) goto restore_eerd; - ret = regmap_write(priv, RV3028_EEPROM_DATA, buf[i]); + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_DATA, buf[i]); if (ret) goto restore_eerd; - ret = regmap_write(priv, RV3028_EEPROM_CMD, 0x0); + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_CMD, 0x0); if (ret) goto restore_eerd; - ret = regmap_write(priv, RV3028_EEPROM_CMD, + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_CMD, RV3028_EEPROM_CMD_WRITE); if (ret) goto restore_eerd; usleep_range(RV3028_EEBUSY_POLL, RV3028_EEBUSY_TIMEOUT); - ret = regmap_read_poll_timeout(priv, RV3028_STATUS, status, + ret = regmap_read_poll_timeout(rv3028->regmap, RV3028_STATUS, status, !(status & RV3028_STATUS_EEBUSY), RV3028_EEBUSY_POLL, RV3028_EEBUSY_TIMEOUT); @@ -502,13 +527,7 @@ static int rv3028_eeprom_write(void *priv, unsigned int offset, void *val, } restore_eerd: - if (!(ctrl1 & RV3028_CTRL1_EERD)) - { - err = regmap_update_bits(priv, RV3028_CTRL1, RV3028_CTRL1_EERD, - 0); - if (err && !ret) - ret = err; - } + rv3028_exit_eerd(rv3028, eerd); return ret; } @@ -516,63 +535,44 @@ static int rv3028_eeprom_write(void *priv, unsigned int offset, void *val, static int rv3028_eeprom_read(void *priv, unsigned int offset, void *val, size_t bytes) { - u32 status, ctrl1, data; - int i, ret, err; + struct rv3028_data *rv3028 = priv; + u32 status, eerd, data; + int i, ret; u8 *buf = val; - ret = regmap_read(priv, RV3028_CTRL1, &ctrl1); + ret = rv3028_enter_eerd(rv3028, &eerd); if (ret) return ret; - if (!(ctrl1 & RV3028_CTRL1_EERD)) { - ret = regmap_update_bits(priv, RV3028_CTRL1, - RV3028_CTRL1_EERD, RV3028_CTRL1_EERD); - if (ret) - return ret; - - ret = regmap_read_poll_timeout(priv, RV3028_STATUS, status, - !(status & RV3028_STATUS_EEBUSY), - RV3028_EEBUSY_POLL, - RV3028_EEBUSY_TIMEOUT); - if (ret) - goto restore_eerd; - } - for (i = 0; i < bytes; i++) { - ret = regmap_write(priv, RV3028_EEPROM_ADDR, offset + i); + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_ADDR, offset + i); if (ret) goto restore_eerd; - ret = regmap_write(priv, RV3028_EEPROM_CMD, 0x0); + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_CMD, 0x0); if (ret) goto restore_eerd; - ret = regmap_write(priv, RV3028_EEPROM_CMD, + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_CMD, RV3028_EEPROM_CMD_READ); if (ret) goto restore_eerd; - ret = regmap_read_poll_timeout(priv, RV3028_STATUS, status, + ret = regmap_read_poll_timeout(rv3028->regmap, RV3028_STATUS, status, !(status & RV3028_STATUS_EEBUSY), RV3028_EEBUSY_POLL, RV3028_EEBUSY_TIMEOUT); if (ret) goto restore_eerd; - ret = regmap_read(priv, RV3028_EEPROM_DATA, &data); + ret = regmap_read(rv3028->regmap, RV3028_EEPROM_DATA, &data); if (ret) goto restore_eerd; buf[i] = data; } restore_eerd: - if (!(ctrl1 & RV3028_CTRL1_EERD)) - { - err = regmap_update_bits(priv, RV3028_CTRL1, RV3028_CTRL1_EERD, - 0); - if (err && !ret) - ret = err; - } + rv3028_exit_eerd(rv3028, eerd); return ret; } @@ -834,7 +834,7 @@ static int rv3028_probe(struct i2c_client *client) nvmem_cfg.priv = rv3028->regmap; rtc_nvmem_register(rv3028->rtc, &nvmem_cfg); - eeprom_cfg.priv = rv3028->regmap; + eeprom_cfg.priv = rv3028; rtc_nvmem_register(rv3028->rtc, &eeprom_cfg); rv3028->rtc->max_user_freq = 1; From 024e6f3dce9ef0bc61acc5b93bb1eba491e4f8d0 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 9 Oct 2020 17:31:01 +0200 Subject: [PATCH 211/243] rtc: rv3028: ensure ram configuration registers are saved If RV3028_CTRL1_EERD is not set (this is the default), the RTC will refresh the RAM configuration registers from the EEPROM at midnight. It is necessary to save the RAM registers back to EEPROM after modifying them. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201009153101.721149-4-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rv3028.c | 78 ++++++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 10 deletions(-) diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index 7b8823f43626..fa226f0fe67d 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -71,6 +71,7 @@ #define RV3028_EVT_CTRL_TSR BIT(2) +#define RV3028_EEPROM_CMD_UPDATE 0x11 #define RV3028_EEPROM_CMD_WRITE 0x21 #define RV3028_EEPROM_CMD_READ 0x22 @@ -209,6 +210,50 @@ static int rv3028_enter_eerd(struct rv3028_data *rv3028, u32 *eerd) return 0; } +static int rv3028_update_eeprom(struct rv3028_data *rv3028, u32 eerd) +{ + u32 status; + int ret; + + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_CMD, 0x0); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_CMD, RV3028_EEPROM_CMD_UPDATE); + if (ret) + goto exit_eerd; + + usleep_range(63000, RV3028_EEBUSY_TIMEOUT); + + ret = regmap_read_poll_timeout(rv3028->regmap, RV3028_STATUS, status, + !(status & RV3028_STATUS_EEBUSY), + RV3028_EEBUSY_POLL, RV3028_EEBUSY_TIMEOUT); + +exit_eerd: + rv3028_exit_eerd(rv3028, eerd); + + return ret; +} + +static int rv3028_update_cfg(struct rv3028_data *rv3028, unsigned int reg, + unsigned int mask, unsigned int val) +{ + u32 eerd; + int ret; + + ret = rv3028_enter_eerd(rv3028, &eerd); + if (ret) + return ret; + + ret = regmap_update_bits(rv3028->regmap, reg, mask, val); + if (ret) { + rv3028_exit_eerd(rv3028, eerd); + return ret; + } + + return rv3028_update_eeprom(rv3028, eerd); +} + static irqreturn_t rv3028_handle_irq(int irq, void *dev_id) { struct rv3028_data *rv3028 = dev_id; @@ -442,17 +487,32 @@ static int rv3028_read_offset(struct device *dev, long *offset) static int rv3028_set_offset(struct device *dev, long offset) { struct rv3028_data *rv3028 = dev_get_drvdata(dev); + u32 eerd; int ret; offset = clamp(offset, -244141L, 243187L) * 1000; offset = DIV_ROUND_CLOSEST(offset, OFFSET_STEP_PPT); - ret = regmap_write(rv3028->regmap, RV3028_OFFSET, offset >> 1); - if (ret < 0) + ret = rv3028_enter_eerd(rv3028, &eerd); + if (ret) return ret; - return regmap_update_bits(rv3028->regmap, RV3028_BACKUP, BIT(7), - offset << 7); + ret = regmap_write(rv3028->regmap, RV3028_OFFSET, offset >> 1); + if (ret < 0) + goto exit_eerd; + + ret = regmap_update_bits(rv3028->regmap, RV3028_BACKUP, BIT(7), + offset << 7); + if (ret < 0) + goto exit_eerd; + + return rv3028_update_eeprom(rv3028, eerd); + +exit_eerd: + rv3028_exit_eerd(rv3028, eerd); + + return ret; + } static int rv3028_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) @@ -634,8 +694,8 @@ static int rv3028_clkout_set_rate(struct clk_hw *hw, unsigned long rate, for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) if (clkout_rates[i] == rate) - return regmap_write(rv3028->regmap, RV3028_CLKOUT, - RV3028_CLKOUT_CLKSY | enabled | i); + return rv3028_update_cfg(rv3028, RV3028_CLKOUT, 0xff, + RV3028_CLKOUT_CLKSY | enabled | i); return -EINVAL; } @@ -810,10 +870,8 @@ static int rv3028_probe(struct i2c_client *client) break; if (i < ARRAY_SIZE(rv3028_trickle_resistors)) { - ret = regmap_update_bits(rv3028->regmap, RV3028_BACKUP, - RV3028_BACKUP_TCE | - RV3028_BACKUP_TCR_MASK, - RV3028_BACKUP_TCE | i); + ret = rv3028_update_cfg(rv3028, RV3028_BACKUP, RV3028_BACKUP_TCE | + RV3028_BACKUP_TCR_MASK, RV3028_BACKUP_TCE | i); if (ret) return ret; } else { From 094eca37193c516106ef8ed7f60ed058ed9fc458 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 14 Oct 2020 15:22:11 -0400 Subject: [PATCH 212/243] NFSv4: Fix up RCU annotations for struct nfs_netns_client The identifier is read as an RCU protected string. Its value may be changed during the lifetime of the network namespace by writing a new string into the sysfs pseudofile (at which point, we free the old string only after a call to synchronize_rcu()). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/sysfs.c | 11 ++++++++--- fs/nfs/sysfs.h | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index c489496b5659..8cb70755e3c9 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -79,7 +79,12 @@ static ssize_t nfs_netns_identifier_show(struct kobject *kobj, struct nfs_netns_client *c = container_of(kobj, struct nfs_netns_client, kobject); - return scnprintf(buf, PAGE_SIZE, "%s\n", c->identifier); + ssize_t ret; + + rcu_read_lock(); + ret = scnprintf(buf, PAGE_SIZE, "%s\n", rcu_dereference(c->identifier)); + rcu_read_unlock(); + return ret; } /* Strip trailing '\n' */ @@ -107,7 +112,7 @@ static ssize_t nfs_netns_identifier_store(struct kobject *kobj, p = kmemdup_nul(buf, len, GFP_KERNEL); if (!p) return -ENOMEM; - old = xchg(&c->identifier, p); + old = rcu_dereference_protected(xchg(&c->identifier, (char __rcu *)p), 1); if (old) { synchronize_rcu(); kfree(old); @@ -121,7 +126,7 @@ static void nfs_netns_client_release(struct kobject *kobj) struct nfs_netns_client, kobject); - kfree(c->identifier); + kfree(rcu_dereference_raw(c->identifier)); kfree(c); } diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h index ebcbdc40483b..5501ef573c32 100644 --- a/fs/nfs/sysfs.h +++ b/fs/nfs/sysfs.h @@ -11,7 +11,7 @@ struct nfs_netns_client { struct kobject kobject; struct net *net; - const char *identifier; + const char __rcu *identifier; }; extern struct kobject *nfs_client_kobj; From 8c39076c276be0b31982e44654e2c2357473258a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 16 Oct 2020 09:25:45 -0400 Subject: [PATCH 213/243] NFSv4.2: support EXCHGID4_FLAG_SUPP_FENCE_OPS 4.2 EXCHANGE_ID flag RFC 7862 introduced a new flag that either client or server is allowed to set: EXCHGID4_FLAG_SUPP_FENCE_OPS. Client needs to update its bitmask to allow for this flag value. v2: changed minor version argument to unsigned int Signed-off-by: Olga Kornievskaia CC: Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 9 ++++++--- include/uapi/linux/nfs4.h | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2e33995691f5..9e0ca9b2b210 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8133,9 +8133,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or * DS flags set. */ -static int nfs4_check_cl_exchange_flags(u32 flags) +static int nfs4_check_cl_exchange_flags(u32 flags, u32 version) { - if (flags & ~EXCHGID4_FLAG_MASK_R) + if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R)) + goto out_inval; + else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R)) goto out_inval; if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) && (flags & EXCHGID4_FLAG_USE_NON_PNFS)) @@ -8548,7 +8550,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre if (status != 0) goto out; - status = nfs4_check_cl_exchange_flags(resp->flags); + status = nfs4_check_cl_exchange_flags(resp->flags, + clp->cl_mvops->minor_version); if (status != 0) goto out; diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index bf197e99b98f..ed5415e0f1c1 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -139,6 +139,8 @@ #define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A 0x40000000 #define EXCHGID4_FLAG_CONFIRMED_R 0x80000000 + +#define EXCHGID4_FLAG_SUPP_FENCE_OPS 0x00000004 /* * Since the validity of these bits depends on whether * they're set in the argument or response, have separate @@ -146,6 +148,7 @@ */ #define EXCHGID4_FLAG_MASK_A 0x40070103 #define EXCHGID4_FLAG_MASK_R 0x80070103 +#define EXCHGID4_2_FLAG_MASK_R 0x80070107 #define SEQ4_STATUS_CB_PATH_DOWN 0x00000001 #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002 From 035fbafc7a54b8c7755b3c508b8f3ab6ff3c8d65 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 12 Oct 2020 15:03:41 +0100 Subject: [PATCH 214/243] io_uring: Fix sizeof() mismatch An incorrect sizeof() is being used, sizeof(file_data->table) is not correct, it should be sizeof(*file_data->table). Fixes: 5398ae698525 ("io_uring: clean file_data access in files_register") Signed-off-by: Colin Ian King Addresses-Coverity: ("Sizeof not portable (SIZEOF_MISMATCH)") Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2e1dc354cd08..717dd5d38d75 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7306,7 +7306,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, spin_lock_init(&file_data->lock); nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE); - file_data->table = kcalloc(nr_tables, sizeof(file_data->table), + file_data->table = kcalloc(nr_tables, sizeof(*file_data->table), GFP_KERNEL); if (!file_data->table) goto out_free; From 368c5481ae7c6a9719c40984faea35480d9f4872 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 13 Oct 2020 09:43:56 +0100 Subject: [PATCH 215/243] io_uring: don't set COMP_LOCKED if won't put __io_kill_linked_timeout() sets REQ_F_COMP_LOCKED for a linked timeout even if it can't cancel it, e.g. it's already running. It not only races with io_link_timeout_fn() for ->flags field, but also leaves the flag set and so io_link_timeout_fn() may find it and decide that it holds the lock. Hopefully, the second problem is potential. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 717dd5d38d75..f0f4b5b5c2a0 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1769,6 +1769,7 @@ static bool io_link_cancel_timeout(struct io_kiocb *req) ret = hrtimer_try_to_cancel(&io->timer); if (ret != -1) { + req->flags |= REQ_F_COMP_LOCKED; io_cqring_fill_event(req, -ECANCELED); io_commit_cqring(ctx); req->flags &= ~REQ_F_LINK_HEAD; @@ -1791,7 +1792,6 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req) return false; list_del_init(&link->link_list); - link->flags |= REQ_F_COMP_LOCKED; wake_ev = io_link_cancel_timeout(link); req->flags &= ~REQ_F_LINK_TIMEOUT; return wake_ev; From b1b74cfc1967bd0747ff85f650f598e84eeb3d1c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 13 Oct 2020 09:43:57 +0100 Subject: [PATCH 216/243] io_uring: don't unnecessarily clear F_LINK_TIMEOUT If a request had REQ_F_LINK_TIMEOUT it would've been cleared in __io_kill_linked_timeout() by the time of __io_fail_links(), so no need to care about it. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f0f4b5b5c2a0..ca9be31b76b3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1852,7 +1852,6 @@ static void __io_fail_links(struct io_kiocb *req) io_cqring_fill_event(link, -ECANCELED); link->flags |= REQ_F_COMP_LOCKED; __io_double_put_req(link); - req->flags &= ~REQ_F_LINK_TIMEOUT; } io_commit_cqring(ctx); From 6a0af224c21309f24dbb1b79d0744b255d7156a0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 13 Oct 2020 09:43:58 +0100 Subject: [PATCH 217/243] io_uring: don't put a poll req under spinlock Move io_put_req() in io_poll_task_handler() from under spinlock. This eliminates the need to use REQ_F_COMP_LOCKED, at the expense of potentially having to grab the lock again. That's still a better trade off than relying on the locked flag. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ca9be31b76b3..92546f90defd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4844,10 +4844,9 @@ static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt) hash_del(&req->hash_node); io_poll_complete(req, req->result, 0); - req->flags |= REQ_F_COMP_LOCKED; - *nxt = io_put_req_find_next(req); spin_unlock_irq(&ctx->completion_lock); + *nxt = io_put_req_find_next(req); io_cqring_ev_posted(ctx); } From 4edf20f9990230e9b85e79954d5cd28fc93616e9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 13 Oct 2020 09:43:59 +0100 Subject: [PATCH 218/243] io_uring: dig out COMP_LOCK from deep call chain io_req_clean_work() checks REQ_F_COMP_LOCK to pass this two layers up. Move the check up into __io_free_req(), so at least it doesn't looks so ugly and would facilitate further changes. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 92546f90defd..0680fa385353 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1181,14 +1181,10 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) } } -/* - * Returns true if we need to defer file table putting. This can only happen - * from the error path with REQ_F_COMP_LOCKED set. - */ -static bool io_req_clean_work(struct io_kiocb *req) +static void io_req_clean_work(struct io_kiocb *req) { if (!(req->flags & REQ_F_WORK_INITIALIZED)) - return false; + return; req->flags &= ~REQ_F_WORK_INITIALIZED; @@ -1207,9 +1203,6 @@ static bool io_req_clean_work(struct io_kiocb *req) if (req->work.fs) { struct fs_struct *fs = req->work.fs; - if (req->flags & REQ_F_COMP_LOCKED) - return true; - spin_lock(&req->work.fs->lock); if (--fs->users) fs = NULL; @@ -1218,8 +1211,6 @@ static bool io_req_clean_work(struct io_kiocb *req) free_fs_struct(fs); req->work.fs = NULL; } - - return false; } static void io_prep_async_work(struct io_kiocb *req) @@ -1699,7 +1690,7 @@ static inline void io_put_file(struct io_kiocb *req, struct file *file, fput(file); } -static bool io_dismantle_req(struct io_kiocb *req) +static void io_dismantle_req(struct io_kiocb *req) { io_clean_op(req); @@ -1708,7 +1699,7 @@ static bool io_dismantle_req(struct io_kiocb *req) if (req->file) io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); - return io_req_clean_work(req); + io_req_clean_work(req); } static void __io_free_req_finish(struct io_kiocb *req) @@ -1731,21 +1722,15 @@ static void __io_free_req_finish(struct io_kiocb *req) static void io_req_task_file_table_put(struct callback_head *cb) { struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); - struct fs_struct *fs = req->work.fs; - spin_lock(&req->work.fs->lock); - if (--fs->users) - fs = NULL; - spin_unlock(&req->work.fs->lock); - if (fs) - free_fs_struct(fs); - req->work.fs = NULL; + io_dismantle_req(req); __io_free_req_finish(req); } static void __io_free_req(struct io_kiocb *req) { - if (!io_dismantle_req(req)) { + if (!(req->flags & REQ_F_COMP_LOCKED)) { + io_dismantle_req(req); __io_free_req_finish(req); } else { int ret; @@ -2057,7 +2042,7 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req) } rb->task_refs++; - WARN_ON_ONCE(io_dismantle_req(req)); + io_dismantle_req(req); rb->reqs[rb->to_free++] = req; if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs))) __io_req_free_batch_flush(req->ctx, rb); From 216578e55ac932cf5e348d9e65d8e129fc9e34cc Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 13 Oct 2020 09:44:00 +0100 Subject: [PATCH 219/243] io_uring: fix REQ_F_COMP_LOCKED by killing it REQ_F_COMP_LOCKED is used and implemented in a buggy way. The problem is that the flag is set before io_put_req() but not cleared after, and if that wasn't the final reference, the request will be freed with the flag set from some other context, which may not hold a spinlock. That means possible races with removing linked timeouts and unsynchronised completion (e.g. access to CQ). Instead of fixing REQ_F_COMP_LOCKED, kill the flag and use task_work_add() to move such requests to a fresh context to free from it, as was done with __io_free_req_finish(). Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 149 +++++++++++++++++++++++--------------------------- 1 file changed, 69 insertions(+), 80 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0680fa385353..641d869d96ee 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -574,7 +574,6 @@ enum { REQ_F_NOWAIT_BIT, REQ_F_LINK_TIMEOUT_BIT, REQ_F_ISREG_BIT, - REQ_F_COMP_LOCKED_BIT, REQ_F_NEED_CLEANUP_BIT, REQ_F_POLLED_BIT, REQ_F_BUFFER_SELECTED_BIT, @@ -613,8 +612,6 @@ enum { REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), /* regular file */ REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), - /* completion under lock */ - REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT), /* needs cleanup */ REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), /* already went through poll handler */ @@ -963,8 +960,8 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2, struct io_comp_state *cs); static void io_cqring_fill_event(struct io_kiocb *req, long res); static void io_put_req(struct io_kiocb *req); +static void io_put_req_deferred(struct io_kiocb *req, int nr); static void io_double_put_req(struct io_kiocb *req); -static void __io_double_put_req(struct io_kiocb *req); static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req); static void __io_queue_linked_timeout(struct io_kiocb *req); static void io_queue_linked_timeout(struct io_kiocb *req); @@ -1316,9 +1313,8 @@ static void io_kill_timeout(struct io_kiocb *req) atomic_set(&req->ctx->cq_timeouts, atomic_read(&req->ctx->cq_timeouts) + 1); list_del_init(&req->timeout.list); - req->flags |= REQ_F_COMP_LOCKED; io_cqring_fill_event(req, 0); - io_put_req(req); + io_put_req_deferred(req, 1); } } @@ -1369,8 +1365,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx) if (link) { __io_queue_linked_timeout(link); /* drop submission reference */ - link->flags |= REQ_F_COMP_LOCKED; - io_put_req(link); + io_put_req_deferred(link, 1); } kfree(de); } while (!list_empty(&ctx->defer_list)); @@ -1597,13 +1592,19 @@ static void io_submit_flush_completions(struct io_comp_state *cs) req = list_first_entry(&cs->list, struct io_kiocb, compl.list); list_del(&req->compl.list); __io_cqring_fill_event(req, req->result, req->compl.cflags); - if (!(req->flags & REQ_F_LINK_HEAD)) { - req->flags |= REQ_F_COMP_LOCKED; - io_put_req(req); - } else { + + /* + * io_free_req() doesn't care about completion_lock unless one + * of these flags is set. REQ_F_WORK_INITIALIZED is in the list + * because of a potential deadlock with req->work.fs->lock + */ + if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT + |REQ_F_WORK_INITIALIZED)) { spin_unlock_irq(&ctx->completion_lock); io_put_req(req); spin_lock_irq(&ctx->completion_lock); + } else { + io_put_req(req); } } io_commit_cqring(ctx); @@ -1702,10 +1703,14 @@ static void io_dismantle_req(struct io_kiocb *req) io_req_clean_work(req); } -static void __io_free_req_finish(struct io_kiocb *req) +static void __io_free_req(struct io_kiocb *req) { - struct io_uring_task *tctx = req->task->io_uring; - struct io_ring_ctx *ctx = req->ctx; + struct io_uring_task *tctx; + struct io_ring_ctx *ctx; + + io_dismantle_req(req); + tctx = req->task->io_uring; + ctx = req->ctx; atomic_long_inc(&tctx->req_complete); if (tctx->in_idle) @@ -1719,33 +1724,6 @@ static void __io_free_req_finish(struct io_kiocb *req) percpu_ref_put(&ctx->refs); } -static void io_req_task_file_table_put(struct callback_head *cb) -{ - struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); - - io_dismantle_req(req); - __io_free_req_finish(req); -} - -static void __io_free_req(struct io_kiocb *req) -{ - if (!(req->flags & REQ_F_COMP_LOCKED)) { - io_dismantle_req(req); - __io_free_req_finish(req); - } else { - int ret; - - init_task_work(&req->task_work, io_req_task_file_table_put); - ret = task_work_add(req->task, &req->task_work, TWA_RESUME); - if (unlikely(ret)) { - struct task_struct *tsk; - - tsk = io_wq_get_task(req->ctx->io_wq); - task_work_add(tsk, &req->task_work, 0); - } - } -} - static bool io_link_cancel_timeout(struct io_kiocb *req) { struct io_timeout_data *io = req->async_data; @@ -1754,11 +1732,10 @@ static bool io_link_cancel_timeout(struct io_kiocb *req) ret = hrtimer_try_to_cancel(&io->timer); if (ret != -1) { - req->flags |= REQ_F_COMP_LOCKED; io_cqring_fill_event(req, -ECANCELED); io_commit_cqring(ctx); req->flags &= ~REQ_F_LINK_HEAD; - io_put_req(req); + io_put_req_deferred(req, 1); return true; } @@ -1785,17 +1762,12 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req) static void io_kill_linked_timeout(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; + unsigned long flags; bool wake_ev; - if (!(req->flags & REQ_F_COMP_LOCKED)) { - unsigned long flags; - - spin_lock_irqsave(&ctx->completion_lock, flags); - wake_ev = __io_kill_linked_timeout(req); - spin_unlock_irqrestore(&ctx->completion_lock, flags); - } else { - wake_ev = __io_kill_linked_timeout(req); - } + spin_lock_irqsave(&ctx->completion_lock, flags); + wake_ev = __io_kill_linked_timeout(req); + spin_unlock_irqrestore(&ctx->completion_lock, flags); if (wake_ev) io_cqring_ev_posted(ctx); @@ -1835,27 +1807,29 @@ static void __io_fail_links(struct io_kiocb *req) trace_io_uring_fail_link(req, link); io_cqring_fill_event(link, -ECANCELED); - link->flags |= REQ_F_COMP_LOCKED; - __io_double_put_req(link); + + /* + * It's ok to free under spinlock as they're not linked anymore, + * but avoid REQ_F_WORK_INITIALIZED because it may deadlock on + * work.fs->lock. + */ + if (link->flags & REQ_F_WORK_INITIALIZED) + io_put_req_deferred(link, 2); + else + io_double_put_req(link); } io_commit_cqring(ctx); - io_cqring_ev_posted(ctx); } static void io_fail_links(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; + unsigned long flags; - if (!(req->flags & REQ_F_COMP_LOCKED)) { - unsigned long flags; - - spin_lock_irqsave(&ctx->completion_lock, flags); - __io_fail_links(req); - spin_unlock_irqrestore(&ctx->completion_lock, flags); - } else { - __io_fail_links(req); - } + spin_lock_irqsave(&ctx->completion_lock, flags); + __io_fail_links(req); + spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); } @@ -2069,6 +2043,34 @@ static void io_put_req(struct io_kiocb *req) io_free_req(req); } +static void io_put_req_deferred_cb(struct callback_head *cb) +{ + struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); + + io_free_req(req); +} + +static void io_free_req_deferred(struct io_kiocb *req) +{ + int ret; + + init_task_work(&req->task_work, io_put_req_deferred_cb); + ret = io_req_task_work_add(req, true); + if (unlikely(ret)) { + struct task_struct *tsk; + + tsk = io_wq_get_task(req->ctx->io_wq); + task_work_add(tsk, &req->task_work, 0); + wake_up_process(tsk); + } +} + +static inline void io_put_req_deferred(struct io_kiocb *req, int refs) +{ + if (refcount_sub_and_test(refs, &req->refs)) + io_free_req_deferred(req); +} + static struct io_wq_work *io_steal_work(struct io_kiocb *req) { struct io_kiocb *nxt; @@ -2085,17 +2087,6 @@ static struct io_wq_work *io_steal_work(struct io_kiocb *req) return nxt ? &nxt->work : NULL; } -/* - * Must only be used if we don't need to care about links, usually from - * within the completion handling itself. - */ -static void __io_double_put_req(struct io_kiocb *req) -{ - /* drop both submit and complete references */ - if (refcount_sub_and_test(2, &req->refs)) - __io_free_req(req); -} - static void io_double_put_req(struct io_kiocb *req) { /* drop both submit and complete references */ @@ -5127,9 +5118,8 @@ static bool io_poll_remove_one(struct io_kiocb *req) if (do_complete) { io_cqring_fill_event(req, -ECANCELED); io_commit_cqring(req->ctx); - req->flags |= REQ_F_COMP_LOCKED; req_set_fail_links(req); - io_put_req(req); + io_put_req_deferred(req, 1); } return do_complete; @@ -5311,9 +5301,8 @@ static int __io_timeout_cancel(struct io_kiocb *req) list_del_init(&req->timeout.list); req_set_fail_links(req); - req->flags |= REQ_F_COMP_LOCKED; io_cqring_fill_event(req, -ECANCELED); - io_put_req(req); + io_put_req_deferred(req, 1); return 0; } From 0918682be432b85ccd49285832221d9b65831ef5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 13 Oct 2020 15:01:40 -0600 Subject: [PATCH 220/243] Revert "io_uring: mark io_uring_fops/io_op_defs as __read_mostly" This reverts commit 738277adc81929b3e7c9b63fec6693868cc5f931. This change didn't make a lot of sense, and as Linus reports, it actually fails on clang: /tmp/io_uring-dd40c4.s:26476: Warning: ignoring changed section attributes for .data..read_mostly The arrays are already marked const so, by definition, they are not just read-mostly, they are read-only. Reported-by: Linus Torvalds Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 641d869d96ee..94a66a6d1cba 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -760,7 +760,7 @@ struct io_op_def { unsigned short async_size; }; -static const struct io_op_def io_op_defs[] __read_mostly = { +static const struct io_op_def io_op_defs[] = { [IORING_OP_NOP] = {}, [IORING_OP_READV] = { .needs_mm = 1, @@ -983,7 +983,7 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, static struct kmem_cache *req_cachep; -static const struct file_operations io_uring_fops __read_mostly; +static const struct file_operations io_uring_fops; struct sock *io_uring_get_socket(struct file *file) { From 55cbc2564ab2fd555ec0fc39311a9cfb811d7da5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Oct 2020 07:35:57 -0600 Subject: [PATCH 221/243] io_uring: fix error path cleanup in io_sqe_files_register() syzbot reports the following crash: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 1 PID: 8927 Comm: syz-executor.3 Not tainted 5.9.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:io_file_from_index fs/io_uring.c:5963 [inline] RIP: 0010:io_sqe_files_register fs/io_uring.c:7369 [inline] RIP: 0010:__io_uring_register fs/io_uring.c:9463 [inline] RIP: 0010:__do_sys_io_uring_register+0x2fd2/0x3ee0 fs/io_uring.c:9553 Code: ec 03 49 c1 ee 03 49 01 ec 49 01 ee e8 57 61 9c ff 41 80 3c 24 00 0f 85 9b 09 00 00 4d 8b af b8 01 00 00 4c 89 e8 48 c1 e8 03 <80> 3c 28 00 0f 85 76 09 00 00 49 8b 55 00 89 d8 c1 f8 09 48 98 4c RSP: 0018:ffffc90009137d68 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffc9000ef2a000 RDX: 0000000000040000 RSI: ffffffff81d81dd9 RDI: 0000000000000005 RBP: dffffc0000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffed1012882a37 R13: 0000000000000000 R14: ffffed1012882a38 R15: ffff888094415000 FS: 00007f4266f3c700(0000) GS:ffff8880ae500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000118c000 CR3: 000000008e57d000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45de59 Code: 0d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f4266f3bc78 EFLAGS: 00000246 ORIG_RAX: 00000000000001ab RAX: ffffffffffffffda RBX: 00000000000083c0 RCX: 000000000045de59 RDX: 0000000020000280 RSI: 0000000000000002 RDI: 0000000000000005 RBP: 000000000118bf68 R08: 0000000000000000 R09: 0000000000000000 R10: 40000000000000a1 R11: 0000000000000246 R12: 000000000118bf2c R13: 00007fff2fa4f12f R14: 00007f4266f3c9c0 R15: 000000000118bf2c Modules linked in: ---[ end trace 2a40a195e2d5e6e6 ]--- RIP: 0010:io_file_from_index fs/io_uring.c:5963 [inline] RIP: 0010:io_sqe_files_register fs/io_uring.c:7369 [inline] RIP: 0010:__io_uring_register fs/io_uring.c:9463 [inline] RIP: 0010:__do_sys_io_uring_register+0x2fd2/0x3ee0 fs/io_uring.c:9553 Code: ec 03 49 c1 ee 03 49 01 ec 49 01 ee e8 57 61 9c ff 41 80 3c 24 00 0f 85 9b 09 00 00 4d 8b af b8 01 00 00 4c 89 e8 48 c1 e8 03 <80> 3c 28 00 0f 85 76 09 00 00 49 8b 55 00 89 d8 c1 f8 09 48 98 4c RSP: 0018:ffffc90009137d68 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffc9000ef2a000 RDX: 0000000000040000 RSI: ffffffff81d81dd9 RDI: 0000000000000005 RBP: dffffc0000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffed1012882a37 R13: 0000000000000000 R14: ffffed1012882a38 R15: ffff888094415000 FS: 00007f4266f3c700(0000) GS:ffff8880ae400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000074a918 CR3: 000000008e57d000 CR4: 00000000001506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 which is a copy of fget failure condition jumping to cleanup, but the cleanup requires ctx->file_data to be assigned. Assign it when setup, and ensure that we clear it again for the error path exit. Fixes: 5398ae698525 ("io_uring: clean file_data access in files_register") Reported-by: syzbot+f4ebcc98223dafd8991e@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 94a66a6d1cba..21c6ce15b751 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7289,6 +7289,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, if (io_sqe_alloc_file_tables(file_data, nr_tables, nr_args)) goto out_ref; + ctx->file_data = file_data; for (i = 0; i < nr_args; i++, ctx->nr_user_files++) { struct fixed_file_table *table; @@ -7323,7 +7324,6 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, table->files[index] = file; } - ctx->file_data = file_data; ret = io_sqe_files_scm(ctx); if (ret) { io_sqe_files_unregister(ctx); @@ -7356,6 +7356,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, out_free: kfree(file_data->table); kfree(file_data); + ctx->file_data = NULL; return ret; } From a8b595b22d31f83b715511f59012f152a269d83b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Oct 2020 10:13:07 -0600 Subject: [PATCH 222/243] io-wq: assign NUMA node locality if appropriate There was an assumption that kthread_create_on_node() would properly set NUMA affinities in terms of CPUs allowed, but it doesn't. Make sure we do this when creating an io-wq context on NUMA. Cc: stable@vger.kernel.org Stefan Metzmacher Signed-off-by: Jens Axboe --- fs/io-wq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io-wq.c b/fs/io-wq.c index 0a182f1333e8..149fd2f0927e 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -676,6 +676,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) kfree(worker); return false; } + kthread_bind_mask(worker->task, cpumask_of_node(wqe->node)); raw_spin_lock_irq(&wqe->lock); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); From 0f203765880c4416675726be558b65da4a7604e2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Oct 2020 09:23:55 -0600 Subject: [PATCH 223/243] io_uring: pass required context in as flags We have a number of bits that decide what context to inherit. Set up io-wq flags for these instead. This is in preparation for always having the various members set, but not always needing them for all requests. No intended functional changes in this patch. Signed-off-by: Jens Axboe --- fs/io-wq.c | 10 +++-- fs/io-wq.h | 6 +++ fs/io_uring.c | 100 ++++++++++++++++++++------------------------------ 3 files changed, 52 insertions(+), 64 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 149fd2f0927e..e636898f8a1f 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -448,6 +448,8 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker, struct io_wq_work *work) { #ifdef CONFIG_BLK_CGROUP + if (!(work->flags & IO_WQ_WORK_BLKCG)) + return; if (work->blkcg_css != worker->blkcg_css) { kthread_associate_blkcg(work->blkcg_css); worker->blkcg_css = work->blkcg_css; @@ -470,17 +472,17 @@ static void io_wq_switch_creds(struct io_worker *worker, static void io_impersonate_work(struct io_worker *worker, struct io_wq_work *work) { - if (work->files && current->files != work->files) { + if ((work->flags & IO_WQ_WORK_FILES) && current->files != work->files) { task_lock(current); current->files = work->files; current->nsproxy = work->nsproxy; task_unlock(current); } - if (work->fs && current->fs != work->fs) + if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->fs) current->fs = work->fs; - if (work->mm != worker->mm) + if ((work->flags & IO_WQ_WORK_MM) && work->mm != worker->mm) io_wq_switch_mm(worker, work); - if (worker->cur_creds != work->creds) + if ((work->flags & IO_WQ_WORK_CREDS) && worker->cur_creds != work->creds) io_wq_switch_creds(worker, work); current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->fsize; io_wq_switch_blkcg(worker, work); diff --git a/fs/io-wq.h b/fs/io-wq.h index 84bcf6a85523..31a29023605a 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -10,6 +10,12 @@ enum { IO_WQ_WORK_NO_CANCEL = 8, IO_WQ_WORK_CONCURRENT = 16, + IO_WQ_WORK_FILES = 32, + IO_WQ_WORK_FS = 64, + IO_WQ_WORK_MM = 128, + IO_WQ_WORK_CREDS = 256, + IO_WQ_WORK_BLKCG = 512, + IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ }; diff --git a/fs/io_uring.c b/fs/io_uring.c index 21c6ce15b751..04a4d8c44718 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -729,8 +729,6 @@ struct io_submit_state { }; struct io_op_def { - /* needs current->mm setup, does mm access */ - unsigned needs_mm : 1; /* needs req->file assigned */ unsigned needs_file : 1; /* don't fail if file grab fails */ @@ -741,10 +739,6 @@ struct io_op_def { unsigned unbound_nonreg_file : 1; /* opcode is not supported by this kernel */ unsigned not_supported : 1; - /* needs file table */ - unsigned file_table : 1; - /* needs ->fs */ - unsigned needs_fs : 1; /* set if opcode supports polled "wait" */ unsigned pollin : 1; unsigned pollout : 1; @@ -754,45 +748,42 @@ struct io_op_def { unsigned needs_fsize : 1; /* must always have async data allocated */ unsigned needs_async_data : 1; - /* needs blkcg context, issues async io potentially */ - unsigned needs_blkcg : 1; /* size of async data needed, if any */ unsigned short async_size; + unsigned work_flags; }; static const struct io_op_def io_op_defs[] = { [IORING_OP_NOP] = {}, [IORING_OP_READV] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, .needs_async_data = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_rw), + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_WRITEV] = { - .needs_mm = 1, .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .needs_fsize = 1, .needs_async_data = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_rw), + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_FSYNC] = { .needs_file = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_READ_FIXED] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_rw), + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_WRITE_FIXED] = { .needs_file = 1, @@ -800,8 +791,8 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .pollout = 1, .needs_fsize = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_rw), + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_POLL_ADD] = { .needs_file = 1, @@ -810,137 +801,123 @@ static const struct io_op_def io_op_defs[] = { [IORING_OP_POLL_REMOVE] = {}, [IORING_OP_SYNC_FILE_RANGE] = { .needs_file = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_SENDMSG] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, - .needs_fs = 1, .pollout = 1, .needs_async_data = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_msghdr), + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | + IO_WQ_WORK_FS, }, [IORING_OP_RECVMSG] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, - .needs_fs = 1, .pollin = 1, .buffer_select = 1, .needs_async_data = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_msghdr), + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | + IO_WQ_WORK_FS, }, [IORING_OP_TIMEOUT] = { - .needs_mm = 1, .needs_async_data = 1, .async_size = sizeof(struct io_timeout_data), + .work_flags = IO_WQ_WORK_MM, }, [IORING_OP_TIMEOUT_REMOVE] = {}, [IORING_OP_ACCEPT] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, - .file_table = 1, .pollin = 1, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FILES, }, [IORING_OP_ASYNC_CANCEL] = {}, [IORING_OP_LINK_TIMEOUT] = { - .needs_mm = 1, .needs_async_data = 1, .async_size = sizeof(struct io_timeout_data), + .work_flags = IO_WQ_WORK_MM, }, [IORING_OP_CONNECT] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .needs_async_data = 1, .async_size = sizeof(struct io_async_connect), + .work_flags = IO_WQ_WORK_MM, }, [IORING_OP_FALLOCATE] = { .needs_file = 1, .needs_fsize = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_OPENAT] = { - .file_table = 1, - .needs_fs = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG | + IO_WQ_WORK_FS, }, [IORING_OP_CLOSE] = { .needs_file = 1, .needs_file_no_error = 1, - .file_table = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG, }, [IORING_OP_FILES_UPDATE] = { - .needs_mm = 1, - .file_table = 1, + .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_MM, }, [IORING_OP_STATX] = { - .needs_mm = 1, - .needs_fs = 1, - .file_table = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_MM | + IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG, }, [IORING_OP_READ] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_rw), + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_WRITE] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .needs_fsize = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_rw), + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_FADVISE] = { .needs_file = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_MADVISE] = { - .needs_mm = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_SEND] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_RECV] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_OPENAT2] = { - .file_table = 1, - .needs_fs = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_FS | + IO_WQ_WORK_BLKCG, }, [IORING_OP_EPOLL_CTL] = { .unbound_nonreg_file = 1, - .file_table = 1, + .work_flags = IO_WQ_WORK_FILES, }, [IORING_OP_SPLICE] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_PROVIDE_BUFFERS] = {}, [IORING_OP_REMOVE_BUFFERS] = {}, @@ -1031,7 +1008,7 @@ static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx) static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx, struct io_kiocb *req) { - if (!io_op_defs[req->opcode].needs_mm) + if (!(io_op_defs[req->opcode].work_flags & IO_WQ_WORK_MM)) return 0; return __io_sq_thread_acquire_mm(ctx); } @@ -1224,7 +1201,8 @@ static void io_prep_async_work(struct io_kiocb *req) if (def->unbound_nonreg_file) req->work.flags |= IO_WQ_WORK_UNBOUND; } - if (!req->work.files && io_op_defs[req->opcode].file_table && + if (!req->work.files && + (io_op_defs[req->opcode].work_flags & IO_WQ_WORK_FILES) && !(req->flags & REQ_F_NO_FILE_TABLE)) { req->work.files = get_files_struct(current); get_nsproxy(current->nsproxy); @@ -1235,12 +1213,12 @@ static void io_prep_async_work(struct io_kiocb *req) list_add(&req->inflight_entry, &ctx->inflight_list); spin_unlock_irq(&ctx->inflight_lock); } - if (!req->work.mm && def->needs_mm) { + if (!req->work.mm && (def->work_flags & IO_WQ_WORK_MM)) { mmgrab(current->mm); req->work.mm = current->mm; } #ifdef CONFIG_BLK_CGROUP - if (!req->work.blkcg_css && def->needs_blkcg) { + if (!req->work.blkcg_css && (def->work_flags & IO_WQ_WORK_BLKCG)) { rcu_read_lock(); req->work.blkcg_css = blkcg_css(); /* @@ -1254,7 +1232,7 @@ static void io_prep_async_work(struct io_kiocb *req) #endif if (!req->work.creds) req->work.creds = get_current_cred(); - if (!req->work.fs && def->needs_fs) { + if (!req->work.fs && (def->work_flags & IO_WQ_WORK_FS)) { spin_lock(¤t->fs->lock); if (!current->fs->in_exec) { req->work.fs = current->fs; @@ -1268,6 +1246,8 @@ static void io_prep_async_work(struct io_kiocb *req) req->work.fsize = rlimit(RLIMIT_FSIZE); else req->work.fsize = RLIM_INFINITY; + + req->work.flags |= def->work_flags; } static void io_prep_async_link(struct io_kiocb *req) From dfead8a8e2c494b947480bac90a6f9792f08bc12 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Oct 2020 10:12:37 -0600 Subject: [PATCH 224/243] io_uring: rely solely on work flags to determine personality. We solely rely on work->work_flags now, so use that for proper checking and clearing/dropping of various identity items. Signed-off-by: Jens Axboe --- fs/io-wq.c | 4 ---- fs/io_uring.c | 55 ++++++++++++++++++++++++++++++++------------------- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index e636898f8a1f..b7d8e544a804 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -429,14 +429,10 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) mmput(worker->mm); worker->mm = NULL; } - if (!work->mm) - return; if (mmget_not_zero(work->mm)) { kthread_use_mm(work->mm); worker->mm = work->mm; - /* hang on to this mm */ - work->mm = NULL; return; } diff --git a/fs/io_uring.c b/fs/io_uring.c index 04a4d8c44718..e92ed22ef924 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1162,19 +1162,21 @@ static void io_req_clean_work(struct io_kiocb *req) req->flags &= ~REQ_F_WORK_INITIALIZED; - if (req->work.mm) { + if (req->work.flags & IO_WQ_WORK_MM) { mmdrop(req->work.mm); - req->work.mm = NULL; + req->work.flags &= ~IO_WQ_WORK_MM; } #ifdef CONFIG_BLK_CGROUP - if (req->work.blkcg_css) + if (req->work.flags & IO_WQ_WORK_BLKCG) { css_put(req->work.blkcg_css); -#endif - if (req->work.creds) { - put_cred(req->work.creds); - req->work.creds = NULL; + req->work.flags &= ~IO_WQ_WORK_BLKCG; } - if (req->work.fs) { +#endif + if (req->work.flags & IO_WQ_WORK_CREDS) { + put_cred(req->work.creds); + req->work.flags &= ~IO_WQ_WORK_CREDS; + } + if (req->work.flags & IO_WQ_WORK_FS) { struct fs_struct *fs = req->work.fs; spin_lock(&req->work.fs->lock); @@ -1183,7 +1185,7 @@ static void io_req_clean_work(struct io_kiocb *req) spin_unlock(&req->work.fs->lock); if (fs) free_fs_struct(fs); - req->work.fs = NULL; + req->work.flags &= ~IO_WQ_WORK_FS; } } @@ -1201,7 +1203,7 @@ static void io_prep_async_work(struct io_kiocb *req) if (def->unbound_nonreg_file) req->work.flags |= IO_WQ_WORK_UNBOUND; } - if (!req->work.files && + if (!(req->work.flags & IO_WQ_WORK_FILES) && (io_op_defs[req->opcode].work_flags & IO_WQ_WORK_FILES) && !(req->flags & REQ_F_NO_FILE_TABLE)) { req->work.files = get_files_struct(current); @@ -1212,13 +1214,17 @@ static void io_prep_async_work(struct io_kiocb *req) spin_lock_irq(&ctx->inflight_lock); list_add(&req->inflight_entry, &ctx->inflight_list); spin_unlock_irq(&ctx->inflight_lock); + req->work.flags |= IO_WQ_WORK_FILES; } - if (!req->work.mm && (def->work_flags & IO_WQ_WORK_MM)) { + if (!(req->work.flags & IO_WQ_WORK_MM) && + (def->work_flags & IO_WQ_WORK_MM)) { mmgrab(current->mm); req->work.mm = current->mm; + req->work.flags |= IO_WQ_WORK_MM; } #ifdef CONFIG_BLK_CGROUP - if (!req->work.blkcg_css && (def->work_flags & IO_WQ_WORK_BLKCG)) { + if (!(req->work.flags & IO_WQ_WORK_BLKCG) && + (def->work_flags & IO_WQ_WORK_BLKCG)) { rcu_read_lock(); req->work.blkcg_css = blkcg_css(); /* @@ -1227,16 +1233,22 @@ static void io_prep_async_work(struct io_kiocb *req) */ if (!css_tryget_online(req->work.blkcg_css)) req->work.blkcg_css = NULL; + else + req->work.flags |= IO_WQ_WORK_BLKCG; rcu_read_unlock(); } #endif - if (!req->work.creds) + if (!(req->work.flags & IO_WQ_WORK_CREDS)) { req->work.creds = get_current_cred(); - if (!req->work.fs && (def->work_flags & IO_WQ_WORK_FS)) { + req->work.flags |= IO_WQ_WORK_CREDS; + } + if (!(req->work.flags & IO_WQ_WORK_FS) && + (def->work_flags & IO_WQ_WORK_FS)) { spin_lock(¤t->fs->lock); if (!current->fs->in_exec) { req->work.fs = current->fs; req->work.fs->users++; + req->work.flags |= IO_WQ_WORK_FS; } else { req->work.flags |= IO_WQ_WORK_CANCEL; } @@ -1246,8 +1258,6 @@ static void io_prep_async_work(struct io_kiocb *req) req->work.fsize = rlimit(RLIMIT_FSIZE); else req->work.fsize = RLIM_INFINITY; - - req->work.flags |= def->work_flags; } static void io_prep_async_link(struct io_kiocb *req) @@ -1437,7 +1447,8 @@ static inline bool io_match_files(struct io_kiocb *req, { if (!files) return true; - if (req->flags & REQ_F_WORK_INITIALIZED) + if ((req->flags & REQ_F_WORK_INITIALIZED) && + (req->work.flags & IO_WQ_WORK_FILES)) return req->work.files == files; return false; } @@ -5694,7 +5705,7 @@ static void io_req_drop_files(struct io_kiocb *req) req->flags &= ~REQ_F_INFLIGHT; put_files_struct(req->work.files); put_nsproxy(req->work.nsproxy); - req->work.files = NULL; + req->work.flags &= ~IO_WQ_WORK_FILES; } static void __io_clean_op(struct io_kiocb *req) @@ -6060,6 +6071,7 @@ static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs) old_creds = NULL; /* restored original creds */ else old_creds = override_creds(req->work.creds); + req->work.flags |= IO_WQ_WORK_CREDS; } ret = io_issue_sqe(req, true, cs); @@ -6367,6 +6379,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (unlikely(!req->work.creds)) return -EINVAL; get_cred(req->work.creds); + req->work.flags |= IO_WQ_WORK_CREDS; } /* same numerical values with corresponding REQ_F_*, safe to copy */ @@ -8234,7 +8247,8 @@ static bool io_wq_files_match(struct io_wq_work *work, void *data) { struct files_struct *files = data; - return !files || work->files == files; + return !files || ((work->flags & IO_WQ_WORK_FILES) && + work->files == files); } /* @@ -8389,7 +8403,8 @@ static bool io_uring_cancel_files(struct io_ring_ctx *ctx, spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { - if (files && req->work.files != files) + if (files && (req->work.flags & IO_WQ_WORK_FILES) && + req->work.files != files) continue; /* req is being completed, ignore */ if (!refcount_inc_not_zero(&req->refs)) From 98447d65b4a7a59f8ea37dc6e5d743247d9a7b01 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Oct 2020 10:48:51 -0600 Subject: [PATCH 225/243] io_uring: move io identity items into separate struct io-wq contains a pointer to the identity, which we just hold in io_kiocb for now. This is in preparation for putting this outside io_kiocb. The only exception is struct files_struct, which we'll need different rules for to avoid a circular dependency. No functional changes in this patch. Signed-off-by: Jens Axboe --- fs/io-wq.c | 34 +++++++++++----------- fs/io-wq.h | 12 ++------ fs/io_uring.c | 62 ++++++++++++++++++++-------------------- include/linux/io_uring.h | 13 ++++++++- 4 files changed, 64 insertions(+), 57 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index b7d8e544a804..0c852b75384d 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -430,9 +430,9 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) worker->mm = NULL; } - if (mmget_not_zero(work->mm)) { - kthread_use_mm(work->mm); - worker->mm = work->mm; + if (mmget_not_zero(work->identity->mm)) { + kthread_use_mm(work->identity->mm); + worker->mm = work->identity->mm; return; } @@ -446,9 +446,9 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker, #ifdef CONFIG_BLK_CGROUP if (!(work->flags & IO_WQ_WORK_BLKCG)) return; - if (work->blkcg_css != worker->blkcg_css) { - kthread_associate_blkcg(work->blkcg_css); - worker->blkcg_css = work->blkcg_css; + if (work->identity->blkcg_css != worker->blkcg_css) { + kthread_associate_blkcg(work->identity->blkcg_css); + worker->blkcg_css = work->identity->blkcg_css; } #endif } @@ -456,9 +456,9 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker, static void io_wq_switch_creds(struct io_worker *worker, struct io_wq_work *work) { - const struct cred *old_creds = override_creds(work->creds); + const struct cred *old_creds = override_creds(work->identity->creds); - worker->cur_creds = work->creds; + worker->cur_creds = work->identity->creds; if (worker->saved_creds) put_cred(old_creds); /* creds set by previous switch */ else @@ -468,19 +468,21 @@ static void io_wq_switch_creds(struct io_worker *worker, static void io_impersonate_work(struct io_worker *worker, struct io_wq_work *work) { - if ((work->flags & IO_WQ_WORK_FILES) && current->files != work->files) { + if ((work->flags & IO_WQ_WORK_FILES) && + current->files != work->identity->files) { task_lock(current); - current->files = work->files; - current->nsproxy = work->nsproxy; + current->files = work->identity->files; + current->nsproxy = work->identity->nsproxy; task_unlock(current); } - if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->fs) - current->fs = work->fs; - if ((work->flags & IO_WQ_WORK_MM) && work->mm != worker->mm) + if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs) + current->fs = work->identity->fs; + if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm) io_wq_switch_mm(worker, work); - if ((work->flags & IO_WQ_WORK_CREDS) && worker->cur_creds != work->creds) + if ((work->flags & IO_WQ_WORK_CREDS) && + worker->cur_creds != work->identity->creds) io_wq_switch_creds(worker, work); - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->fsize; + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize; io_wq_switch_blkcg(worker, work); } diff --git a/fs/io-wq.h b/fs/io-wq.h index 31a29023605a..be21c500c925 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -1,6 +1,8 @@ #ifndef INTERNAL_IO_WQ_H #define INTERNAL_IO_WQ_H +#include + struct io_wq; enum { @@ -91,15 +93,7 @@ static inline void wq_list_del(struct io_wq_work_list *list, struct io_wq_work { struct io_wq_work_node list; - struct files_struct *files; - struct mm_struct *mm; -#ifdef CONFIG_BLK_CGROUP - struct cgroup_subsys_state *blkcg_css; -#endif - const struct cred *creds; - struct nsproxy *nsproxy; - struct fs_struct *fs; - unsigned long fsize; + struct io_identity *identity; unsigned flags; }; diff --git a/fs/io_uring.c b/fs/io_uring.c index e92ed22ef924..bd6fd51302ed 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -689,6 +689,7 @@ struct io_kiocb { struct hlist_node hash_node; struct async_poll *apoll; struct io_wq_work work; + struct io_identity identity; }; struct io_defer_entry { @@ -1050,6 +1051,7 @@ static inline void io_req_init_async(struct io_kiocb *req) memset(&req->work, 0, sizeof(req->work)); req->flags |= REQ_F_WORK_INITIALIZED; + req->work.identity = &req->identity; } static inline bool io_async_submit(struct io_ring_ctx *ctx) @@ -1163,26 +1165,26 @@ static void io_req_clean_work(struct io_kiocb *req) req->flags &= ~REQ_F_WORK_INITIALIZED; if (req->work.flags & IO_WQ_WORK_MM) { - mmdrop(req->work.mm); + mmdrop(req->work.identity->mm); req->work.flags &= ~IO_WQ_WORK_MM; } #ifdef CONFIG_BLK_CGROUP if (req->work.flags & IO_WQ_WORK_BLKCG) { - css_put(req->work.blkcg_css); + css_put(req->work.identity->blkcg_css); req->work.flags &= ~IO_WQ_WORK_BLKCG; } #endif if (req->work.flags & IO_WQ_WORK_CREDS) { - put_cred(req->work.creds); + put_cred(req->work.identity->creds); req->work.flags &= ~IO_WQ_WORK_CREDS; } if (req->work.flags & IO_WQ_WORK_FS) { - struct fs_struct *fs = req->work.fs; + struct fs_struct *fs = req->work.identity->fs; - spin_lock(&req->work.fs->lock); + spin_lock(&req->work.identity->fs->lock); if (--fs->users) fs = NULL; - spin_unlock(&req->work.fs->lock); + spin_unlock(&req->work.identity->fs->lock); if (fs) free_fs_struct(fs); req->work.flags &= ~IO_WQ_WORK_FS; @@ -1206,9 +1208,9 @@ static void io_prep_async_work(struct io_kiocb *req) if (!(req->work.flags & IO_WQ_WORK_FILES) && (io_op_defs[req->opcode].work_flags & IO_WQ_WORK_FILES) && !(req->flags & REQ_F_NO_FILE_TABLE)) { - req->work.files = get_files_struct(current); + req->work.identity->files = get_files_struct(current); get_nsproxy(current->nsproxy); - req->work.nsproxy = current->nsproxy; + req->work.identity->nsproxy = current->nsproxy; req->flags |= REQ_F_INFLIGHT; spin_lock_irq(&ctx->inflight_lock); @@ -1219,35 +1221,33 @@ static void io_prep_async_work(struct io_kiocb *req) if (!(req->work.flags & IO_WQ_WORK_MM) && (def->work_flags & IO_WQ_WORK_MM)) { mmgrab(current->mm); - req->work.mm = current->mm; + req->work.identity->mm = current->mm; req->work.flags |= IO_WQ_WORK_MM; } #ifdef CONFIG_BLK_CGROUP if (!(req->work.flags & IO_WQ_WORK_BLKCG) && (def->work_flags & IO_WQ_WORK_BLKCG)) { rcu_read_lock(); - req->work.blkcg_css = blkcg_css(); + req->work.identity->blkcg_css = blkcg_css(); /* * This should be rare, either the cgroup is dying or the task * is moving cgroups. Just punt to root for the handful of ios. */ - if (!css_tryget_online(req->work.blkcg_css)) - req->work.blkcg_css = NULL; - else + if (css_tryget_online(req->work.identity->blkcg_css)) req->work.flags |= IO_WQ_WORK_BLKCG; rcu_read_unlock(); } #endif if (!(req->work.flags & IO_WQ_WORK_CREDS)) { - req->work.creds = get_current_cred(); + req->work.identity->creds = get_current_cred(); req->work.flags |= IO_WQ_WORK_CREDS; } if (!(req->work.flags & IO_WQ_WORK_FS) && (def->work_flags & IO_WQ_WORK_FS)) { spin_lock(¤t->fs->lock); if (!current->fs->in_exec) { - req->work.fs = current->fs; - req->work.fs->users++; + req->work.identity->fs = current->fs; + req->work.identity->fs->users++; req->work.flags |= IO_WQ_WORK_FS; } else { req->work.flags |= IO_WQ_WORK_CANCEL; @@ -1255,9 +1255,9 @@ static void io_prep_async_work(struct io_kiocb *req) spin_unlock(¤t->fs->lock); } if (def->needs_fsize) - req->work.fsize = rlimit(RLIMIT_FSIZE); + req->work.identity->fsize = rlimit(RLIMIT_FSIZE); else - req->work.fsize = RLIM_INFINITY; + req->work.identity->fsize = RLIM_INFINITY; } static void io_prep_async_link(struct io_kiocb *req) @@ -1449,7 +1449,7 @@ static inline bool io_match_files(struct io_kiocb *req, return true; if ((req->flags & REQ_F_WORK_INITIALIZED) && (req->work.flags & IO_WQ_WORK_FILES)) - return req->work.files == files; + return req->work.identity->files == files; return false; } @@ -4089,7 +4089,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock, } /* No ->flush() or already async, safely close from here */ - ret = filp_close(close->put_file, req->work.files); + ret = filp_close(close->put_file, req->work.identity->files); if (ret < 0) req_set_fail_links(req); fput(close->put_file); @@ -5703,8 +5703,8 @@ static void io_req_drop_files(struct io_kiocb *req) wake_up(&ctx->inflight_wait); spin_unlock_irqrestore(&ctx->inflight_lock, flags); req->flags &= ~REQ_F_INFLIGHT; - put_files_struct(req->work.files); - put_nsproxy(req->work.nsproxy); + put_files_struct(req->work.identity->files); + put_nsproxy(req->work.identity->nsproxy); req->work.flags &= ~IO_WQ_WORK_FILES; } @@ -6063,14 +6063,14 @@ static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs) again: linked_timeout = io_prep_linked_timeout(req); - if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds && - req->work.creds != current_cred()) { + if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.identity->creds && + req->work.identity->creds != current_cred()) { if (old_creds) revert_creds(old_creds); - if (old_creds == req->work.creds) + if (old_creds == req->work.identity->creds) old_creds = NULL; /* restored original creds */ else - old_creds = override_creds(req->work.creds); + old_creds = override_creds(req->work.identity->creds); req->work.flags |= IO_WQ_WORK_CREDS; } @@ -6375,10 +6375,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, id = READ_ONCE(sqe->personality); if (id) { io_req_init_async(req); - req->work.creds = idr_find(&ctx->personality_idr, id); - if (unlikely(!req->work.creds)) + req->work.identity->creds = idr_find(&ctx->personality_idr, id); + if (unlikely(!req->work.identity->creds)) return -EINVAL; - get_cred(req->work.creds); + get_cred(req->work.identity->creds); req->work.flags |= IO_WQ_WORK_CREDS; } @@ -8248,7 +8248,7 @@ static bool io_wq_files_match(struct io_wq_work *work, void *data) struct files_struct *files = data; return !files || ((work->flags & IO_WQ_WORK_FILES) && - work->files == files); + work->identity->files == files); } /* @@ -8404,7 +8404,7 @@ static bool io_uring_cancel_files(struct io_ring_ctx *ctx, spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { if (files && (req->work.flags & IO_WQ_WORK_FILES) && - req->work.files != files) + req->work.identity->files != files) continue; /* req is being completed, ignore */ if (!refcount_inc_not_zero(&req->refs)) diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 96315cfaf6d1..352aa6bbd36b 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -4,7 +4,18 @@ #include #include -#include + +struct io_identity { + struct files_struct *files; + struct mm_struct *mm; +#ifdef CONFIG_BLK_CGROUP + struct cgroup_subsys_state *blkcg_css; +#endif + const struct cred *creds; + struct nsproxy *nsproxy; + struct fs_struct *fs; + unsigned long fsize; +}; struct io_uring_task { /* submission side */ From 1e6fa5216a0e59ef02e8b6b40d553238a3b81d49 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Oct 2020 08:46:24 -0600 Subject: [PATCH 226/243] io_uring: COW io_identity on mismatch If the io_identity doesn't completely match the task, then create a copy of it and use that. The existing copy remains valid until the last user of it has gone away. This also changes the personality lookup to be indexed by io_identity, instead of creds directly. Signed-off-by: Jens Axboe --- fs/io_uring.c | 258 ++++++++++++++++++++++++++++----------- include/linux/io_uring.h | 1 + 2 files changed, 189 insertions(+), 70 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index bd6fd51302ed..ab30834c275f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1040,6 +1040,27 @@ static inline void req_set_fail_links(struct io_kiocb *req) req->flags |= REQ_F_FAIL_LINK; } +/* + * None of these are dereferenced, they are simply used to check if any of + * them have changed. If we're under current and check they are still the + * same, we're fine to grab references to them for actual out-of-line use. + */ +static void io_init_identity(struct io_identity *id) +{ + id->files = current->files; + id->mm = current->mm; +#ifdef CONFIG_BLK_CGROUP + rcu_read_lock(); + id->blkcg_css = blkcg_css(); + rcu_read_unlock(); +#endif + id->creds = current_cred(); + id->nsproxy = current->nsproxy; + id->fs = current->fs; + id->fsize = rlimit(RLIMIT_FSIZE); + refcount_set(&id->count, 1); +} + /* * Note: must call io_req_init_async() for the first time you * touch any members of io_wq_work. @@ -1051,6 +1072,7 @@ static inline void io_req_init_async(struct io_kiocb *req) memset(&req->work, 0, sizeof(req->work)); req->flags |= REQ_F_WORK_INITIALIZED; + io_init_identity(&req->identity); req->work.identity = &req->identity; } @@ -1157,6 +1179,14 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) } } +static void io_put_identity(struct io_kiocb *req) +{ + if (req->work.identity == &req->identity) + return; + if (refcount_dec_and_test(&req->work.identity->count)) + kfree(req->work.identity); +} + static void io_req_clean_work(struct io_kiocb *req) { if (!(req->flags & REQ_F_WORK_INITIALIZED)) @@ -1189,11 +1219,118 @@ static void io_req_clean_work(struct io_kiocb *req) free_fs_struct(fs); req->work.flags &= ~IO_WQ_WORK_FS; } + + io_put_identity(req); +} + +/* + * Create a private copy of io_identity, since some fields don't match + * the current context. + */ +static bool io_identity_cow(struct io_kiocb *req) +{ + const struct cred *creds = NULL; + struct io_identity *id; + + if (req->work.flags & IO_WQ_WORK_CREDS) + creds = req->work.identity->creds; + + id = kmemdup(req->work.identity, sizeof(*id), GFP_KERNEL); + if (unlikely(!id)) { + req->work.flags |= IO_WQ_WORK_CANCEL; + return false; + } + + /* + * We can safely just re-init the creds we copied Either the field + * matches the current one, or we haven't grabbed it yet. The only + * exception is ->creds, through registered personalities, so handle + * that one separately. + */ + io_init_identity(id); + if (creds) + req->work.identity->creds = creds; + + /* add one for this request */ + refcount_inc(&id->count); + + /* drop old identity, assign new one. one ref for req, one for tctx */ + if (req->work.identity != &req->identity && + refcount_sub_and_test(2, &req->work.identity->count)) + kfree(req->work.identity); + + req->work.identity = id; + return true; +} + +static bool io_grab_identity(struct io_kiocb *req) +{ + const struct io_op_def *def = &io_op_defs[req->opcode]; + struct io_identity *id = &req->identity; + struct io_ring_ctx *ctx = req->ctx; + + if (def->needs_fsize && id->fsize != rlimit(RLIMIT_FSIZE)) + return false; + + if (!(req->work.flags & IO_WQ_WORK_FILES) && + (def->work_flags & IO_WQ_WORK_FILES) && + !(req->flags & REQ_F_NO_FILE_TABLE)) { + if (id->files != current->files || + id->nsproxy != current->nsproxy) + return false; + atomic_inc(&id->files->count); + get_nsproxy(id->nsproxy); + req->flags |= REQ_F_INFLIGHT; + + spin_lock_irq(&ctx->inflight_lock); + list_add(&req->inflight_entry, &ctx->inflight_list); + spin_unlock_irq(&ctx->inflight_lock); + req->work.flags |= IO_WQ_WORK_FILES; + } +#ifdef CONFIG_BLK_CGROUP + if (!(req->work.flags & IO_WQ_WORK_BLKCG) && + (def->work_flags & IO_WQ_WORK_BLKCG)) { + rcu_read_lock(); + if (id->blkcg_css != blkcg_css()) { + rcu_read_unlock(); + return false; + } + /* + * This should be rare, either the cgroup is dying or the task + * is moving cgroups. Just punt to root for the handful of ios. + */ + if (css_tryget_online(id->blkcg_css)) + req->work.flags |= IO_WQ_WORK_BLKCG; + rcu_read_unlock(); + } +#endif + if (!(req->work.flags & IO_WQ_WORK_CREDS)) { + if (id->creds != current_cred()) + return false; + get_cred(id->creds); + req->work.flags |= IO_WQ_WORK_CREDS; + } + if (!(req->work.flags & IO_WQ_WORK_FS) && + (def->work_flags & IO_WQ_WORK_FS)) { + if (current->fs != id->fs) + return false; + spin_lock(&id->fs->lock); + if (!id->fs->in_exec) { + id->fs->users++; + req->work.flags |= IO_WQ_WORK_FS; + } else { + req->work.flags |= IO_WQ_WORK_CANCEL; + } + spin_unlock(¤t->fs->lock); + } + + return true; } static void io_prep_async_work(struct io_kiocb *req) { const struct io_op_def *def = &io_op_defs[req->opcode]; + struct io_identity *id = &req->identity; struct io_ring_ctx *ctx = req->ctx; io_req_init_async(req); @@ -1205,59 +1342,24 @@ static void io_prep_async_work(struct io_kiocb *req) if (def->unbound_nonreg_file) req->work.flags |= IO_WQ_WORK_UNBOUND; } - if (!(req->work.flags & IO_WQ_WORK_FILES) && - (io_op_defs[req->opcode].work_flags & IO_WQ_WORK_FILES) && - !(req->flags & REQ_F_NO_FILE_TABLE)) { - req->work.identity->files = get_files_struct(current); - get_nsproxy(current->nsproxy); - req->work.identity->nsproxy = current->nsproxy; - req->flags |= REQ_F_INFLIGHT; - spin_lock_irq(&ctx->inflight_lock); - list_add(&req->inflight_entry, &ctx->inflight_list); - spin_unlock_irq(&ctx->inflight_lock); - req->work.flags |= IO_WQ_WORK_FILES; - } + /* ->mm can never change on us */ if (!(req->work.flags & IO_WQ_WORK_MM) && (def->work_flags & IO_WQ_WORK_MM)) { - mmgrab(current->mm); - req->work.identity->mm = current->mm; + mmgrab(id->mm); req->work.flags |= IO_WQ_WORK_MM; } -#ifdef CONFIG_BLK_CGROUP - if (!(req->work.flags & IO_WQ_WORK_BLKCG) && - (def->work_flags & IO_WQ_WORK_BLKCG)) { - rcu_read_lock(); - req->work.identity->blkcg_css = blkcg_css(); - /* - * This should be rare, either the cgroup is dying or the task - * is moving cgroups. Just punt to root for the handful of ios. - */ - if (css_tryget_online(req->work.identity->blkcg_css)) - req->work.flags |= IO_WQ_WORK_BLKCG; - rcu_read_unlock(); - } -#endif - if (!(req->work.flags & IO_WQ_WORK_CREDS)) { - req->work.identity->creds = get_current_cred(); - req->work.flags |= IO_WQ_WORK_CREDS; - } - if (!(req->work.flags & IO_WQ_WORK_FS) && - (def->work_flags & IO_WQ_WORK_FS)) { - spin_lock(¤t->fs->lock); - if (!current->fs->in_exec) { - req->work.identity->fs = current->fs; - req->work.identity->fs->users++; - req->work.flags |= IO_WQ_WORK_FS; - } else { - req->work.flags |= IO_WQ_WORK_CANCEL; - } - spin_unlock(¤t->fs->lock); - } - if (def->needs_fsize) - req->work.identity->fsize = rlimit(RLIMIT_FSIZE); - else - req->work.identity->fsize = RLIM_INFINITY; + + /* if we fail grabbing identity, we must COW, regrab, and retry */ + if (io_grab_identity(req)) + return; + + if (!io_identity_cow(req)) + return; + + /* can't fail at this point */ + if (!io_grab_identity(req)) + WARN_ON(1); } static void io_prep_async_link(struct io_kiocb *req) @@ -1696,12 +1798,10 @@ static void io_dismantle_req(struct io_kiocb *req) static void __io_free_req(struct io_kiocb *req) { - struct io_uring_task *tctx; - struct io_ring_ctx *ctx; + struct io_uring_task *tctx = req->task->io_uring; + struct io_ring_ctx *ctx = req->ctx; io_dismantle_req(req); - tctx = req->task->io_uring; - ctx = req->ctx; atomic_long_inc(&tctx->req_complete); if (tctx->in_idle) @@ -6374,11 +6474,16 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, id = READ_ONCE(sqe->personality); if (id) { + struct io_identity *iod; + io_req_init_async(req); - req->work.identity->creds = idr_find(&ctx->personality_idr, id); - if (unlikely(!req->work.identity->creds)) + iod = idr_find(&ctx->personality_idr, id); + if (unlikely(!iod)) return -EINVAL; - get_cred(req->work.identity->creds); + refcount_inc(&iod->count); + io_put_identity(req); + get_cred(iod->creds); + req->work.identity = iod; req->work.flags |= IO_WQ_WORK_CREDS; } @@ -8171,11 +8276,14 @@ static int io_uring_fasync(int fd, struct file *file, int on) static int io_remove_personalities(int id, void *p, void *data) { struct io_ring_ctx *ctx = data; - const struct cred *cred; + struct io_identity *iod; - cred = idr_remove(&ctx->personality_idr, id); - if (cred) - put_cred(cred); + iod = idr_remove(&ctx->personality_idr, id); + if (iod) { + put_cred(iod->creds); + if (refcount_dec_and_test(&iod->count)) + kfree(iod); + } return 0; } @@ -9245,23 +9353,33 @@ static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) static int io_register_personality(struct io_ring_ctx *ctx) { - const struct cred *creds = get_current_cred(); - int id; + struct io_identity *id; + int ret; - id = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1, - USHRT_MAX, GFP_KERNEL); - if (id < 0) - put_cred(creds); - return id; + id = kmalloc(sizeof(*id), GFP_KERNEL); + if (unlikely(!id)) + return -ENOMEM; + + io_init_identity(id); + id->creds = get_current_cred(); + + ret = idr_alloc_cyclic(&ctx->personality_idr, id, 1, USHRT_MAX, GFP_KERNEL); + if (ret < 0) { + put_cred(id->creds); + kfree(id); + } + return ret; } static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) { - const struct cred *old_creds; + struct io_identity *iod; - old_creds = idr_remove(&ctx->personality_idr, id); - if (old_creds) { - put_cred(old_creds); + iod = idr_remove(&ctx->personality_idr, id); + if (iod) { + put_cred(iod->creds); + if (refcount_dec_and_test(&iod->count)) + kfree(iod); return 0; } diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 352aa6bbd36b..342cc574d5c0 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -15,6 +15,7 @@ struct io_identity { struct nsproxy *nsproxy; struct fs_struct *fs; unsigned long fsize; + refcount_t count; }; struct io_uring_task { From 5c3462cfd123b341c9d3c947c1a2bab373f1697f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Oct 2020 09:02:33 -0600 Subject: [PATCH 227/243] io_uring: store io_identity in io_uring_task This is, by definition, a per-task structure. So store it in the task context, instead of doing carrying it in each io_kiocb. We're being a bit inefficient if members have changed, as that requires an alloc and copy of a new io_identity struct. The next patch will fix that up. Signed-off-by: Jens Axboe --- fs/io_uring.c | 21 +++++++++++---------- include/linux/io_uring.h | 1 + 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ab30834c275f..ae91632b8bf9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -689,7 +689,6 @@ struct io_kiocb { struct hlist_node hash_node; struct async_poll *apoll; struct io_wq_work work; - struct io_identity identity; }; struct io_defer_entry { @@ -1072,8 +1071,7 @@ static inline void io_req_init_async(struct io_kiocb *req) memset(&req->work, 0, sizeof(req->work)); req->flags |= REQ_F_WORK_INITIALIZED; - io_init_identity(&req->identity); - req->work.identity = &req->identity; + req->work.identity = ¤t->io_uring->identity; } static inline bool io_async_submit(struct io_ring_ctx *ctx) @@ -1179,9 +1177,9 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) } } -static void io_put_identity(struct io_kiocb *req) +static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req) { - if (req->work.identity == &req->identity) + if (req->work.identity == &tctx->identity) return; if (refcount_dec_and_test(&req->work.identity->count)) kfree(req->work.identity); @@ -1220,7 +1218,7 @@ static void io_req_clean_work(struct io_kiocb *req) req->work.flags &= ~IO_WQ_WORK_FS; } - io_put_identity(req); + io_put_identity(req->task->io_uring, req); } /* @@ -1229,6 +1227,7 @@ static void io_req_clean_work(struct io_kiocb *req) */ static bool io_identity_cow(struct io_kiocb *req) { + struct io_uring_task *tctx = current->io_uring; const struct cred *creds = NULL; struct io_identity *id; @@ -1255,7 +1254,7 @@ static bool io_identity_cow(struct io_kiocb *req) refcount_inc(&id->count); /* drop old identity, assign new one. one ref for req, one for tctx */ - if (req->work.identity != &req->identity && + if (req->work.identity != &tctx->identity && refcount_sub_and_test(2, &req->work.identity->count)) kfree(req->work.identity); @@ -1266,7 +1265,7 @@ static bool io_identity_cow(struct io_kiocb *req) static bool io_grab_identity(struct io_kiocb *req) { const struct io_op_def *def = &io_op_defs[req->opcode]; - struct io_identity *id = &req->identity; + struct io_identity *id = req->work.identity; struct io_ring_ctx *ctx = req->ctx; if (def->needs_fsize && id->fsize != rlimit(RLIMIT_FSIZE)) @@ -1330,10 +1329,11 @@ static bool io_grab_identity(struct io_kiocb *req) static void io_prep_async_work(struct io_kiocb *req) { const struct io_op_def *def = &io_op_defs[req->opcode]; - struct io_identity *id = &req->identity; struct io_ring_ctx *ctx = req->ctx; + struct io_identity *id; io_req_init_async(req); + id = req->work.identity; if (req->flags & REQ_F_ISREG) { if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL)) @@ -6481,7 +6481,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (unlikely(!iod)) return -EINVAL; refcount_inc(&iod->count); - io_put_identity(req); + io_put_identity(current->io_uring, req); get_cred(iod->creds); req->work.identity = iod; req->work.flags |= IO_WQ_WORK_CREDS; @@ -7691,6 +7691,7 @@ static int io_uring_alloc_task_context(struct task_struct *task) tctx->in_idle = 0; atomic_long_set(&tctx->req_issue, 0); atomic_long_set(&tctx->req_complete, 0); + io_init_identity(&tctx->identity); task->io_uring = tctx; return 0; } diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 342cc574d5c0..bd3346194bca 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -24,6 +24,7 @@ struct io_uring_task { struct wait_queue_head wait; struct file *last; atomic_long_t req_issue; + struct io_identity identity; /* completion side */ bool in_idle ____cacheline_aligned_in_smp; From 500a373d731ac506612db12631ec21295c1ff360 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Oct 2020 17:38:03 -0600 Subject: [PATCH 228/243] io_uring: assign new io_identity for task if members have changed This avoids doing a copy for each new async IO, if some parts of the io_identity has changed. We avoid reference counting for the normal fast path of nothing ever changing. Signed-off-by: Jens Axboe --- fs/io_uring.c | 19 +++++++++++++++---- include/linux/io_uring.h | 3 ++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ae91632b8bf9..7020c6a72231 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1066,12 +1066,18 @@ static void io_init_identity(struct io_identity *id) */ static inline void io_req_init_async(struct io_kiocb *req) { + struct io_uring_task *tctx = current->io_uring; + if (req->flags & REQ_F_WORK_INITIALIZED) return; memset(&req->work, 0, sizeof(req->work)); req->flags |= REQ_F_WORK_INITIALIZED; - req->work.identity = ¤t->io_uring->identity; + + /* Grab a ref if this isn't our static identity */ + req->work.identity = tctx->identity; + if (tctx->identity != &tctx->__identity) + refcount_inc(&req->work.identity->count); } static inline bool io_async_submit(struct io_ring_ctx *ctx) @@ -1179,7 +1185,7 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req) { - if (req->work.identity == &tctx->identity) + if (req->work.identity == &tctx->__identity) return; if (refcount_dec_and_test(&req->work.identity->count)) kfree(req->work.identity); @@ -1254,11 +1260,12 @@ static bool io_identity_cow(struct io_kiocb *req) refcount_inc(&id->count); /* drop old identity, assign new one. one ref for req, one for tctx */ - if (req->work.identity != &tctx->identity && + if (req->work.identity != tctx->identity && refcount_sub_and_test(2, &req->work.identity->count)) kfree(req->work.identity); req->work.identity = id; + tctx->identity = id; return true; } @@ -7691,7 +7698,8 @@ static int io_uring_alloc_task_context(struct task_struct *task) tctx->in_idle = 0; atomic_long_set(&tctx->req_issue, 0); atomic_long_set(&tctx->req_complete, 0); - io_init_identity(&tctx->identity); + io_init_identity(&tctx->__identity); + tctx->identity = &tctx->__identity; task->io_uring = tctx; return 0; } @@ -7701,6 +7709,9 @@ void __io_uring_free(struct task_struct *tsk) struct io_uring_task *tctx = tsk->io_uring; WARN_ON_ONCE(!xa_empty(&tctx->xa)); + WARN_ON_ONCE(refcount_read(&tctx->identity->count) != 1); + if (tctx->identity != &tctx->__identity) + kfree(tctx->identity); kfree(tctx); tsk->io_uring = NULL; } diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index bd3346194bca..607d14f61132 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -24,7 +24,8 @@ struct io_uring_task { struct wait_queue_head wait; struct file *last; atomic_long_t req_issue; - struct io_identity identity; + struct io_identity __identity; + struct io_identity *identity; /* completion side */ bool in_idle ____cacheline_aligned_in_smp; From d8a6df10aac9f2e4d5f30aff3129d552d2984ce7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Oct 2020 16:24:45 -0600 Subject: [PATCH 229/243] io_uring: use percpu counters to track inflight requests Even though we place the req_issued and req_complete in separate cachelines, there's considerable overhead in doing the atomics particularly on the completion side. Get rid of having the two counters, and just use a percpu_counter for this. That's what it was made for, after all. This considerably reduces the overhead in __io_free_req(). Signed-off-by: Jens Axboe --- fs/io_uring.c | 50 ++++++++++++++++++++++------------------ include/linux/io_uring.h | 7 ++---- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7020c6a72231..58c445b95085 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1810,7 +1810,7 @@ static void __io_free_req(struct io_kiocb *req) io_dismantle_req(req); - atomic_long_inc(&tctx->req_complete); + percpu_counter_dec(&tctx->inflight); if (tctx->in_idle) wake_up(&tctx->wait); put_task_struct(req->task); @@ -2089,7 +2089,9 @@ static void io_req_free_batch_finish(struct io_ring_ctx *ctx, if (rb->to_free) __io_req_free_batch_flush(ctx, rb); if (rb->task) { - atomic_long_add(rb->task_refs, &rb->task->io_uring->req_complete); + struct io_uring_task *tctx = rb->task->io_uring; + + percpu_counter_sub(&tctx->inflight, rb->task_refs); put_task_struct_many(rb->task, rb->task_refs); rb->task = NULL; } @@ -2106,7 +2108,9 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req) if (req->task != rb->task) { if (rb->task) { - atomic_long_add(rb->task_refs, &rb->task->io_uring->req_complete); + struct io_uring_task *tctx = rb->task->io_uring; + + percpu_counter_sub(&tctx->inflight, rb->task_refs); put_task_struct_many(rb->task, rb->task_refs); } rb->task = req->task; @@ -6524,7 +6528,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) if (!percpu_ref_tryget_many(&ctx->refs, nr)) return -EAGAIN; - atomic_long_add(nr, ¤t->io_uring->req_issue); + percpu_counter_add(¤t->io_uring->inflight, nr); refcount_add(nr, ¤t->usage); io_submit_state_start(&state, ctx, nr); @@ -6566,10 +6570,12 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) if (unlikely(submitted != nr)) { int ref_used = (submitted == -EAGAIN) ? 0 : submitted; + struct io_uring_task *tctx = current->io_uring; + int unused = nr - ref_used; - percpu_ref_put_many(&ctx->refs, nr - ref_used); - atomic_long_sub(nr - ref_used, ¤t->io_uring->req_issue); - put_task_struct_many(current, nr - ref_used); + percpu_ref_put_many(&ctx->refs, unused); + percpu_counter_sub(&tctx->inflight, unused); + put_task_struct_many(current, unused); } if (link) io_queue_link_head(link, &state.comp); @@ -7687,17 +7693,22 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx, static int io_uring_alloc_task_context(struct task_struct *task) { struct io_uring_task *tctx; + int ret; tctx = kmalloc(sizeof(*tctx), GFP_KERNEL); if (unlikely(!tctx)) return -ENOMEM; + ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL); + if (unlikely(ret)) { + kfree(tctx); + return ret; + } + xa_init(&tctx->xa); init_waitqueue_head(&tctx->wait); tctx->last = NULL; tctx->in_idle = 0; - atomic_long_set(&tctx->req_issue, 0); - atomic_long_set(&tctx->req_complete, 0); io_init_identity(&tctx->__identity); tctx->identity = &tctx->__identity; task->io_uring = tctx; @@ -7712,6 +7723,7 @@ void __io_uring_free(struct task_struct *tsk) WARN_ON_ONCE(refcount_read(&tctx->identity->count) != 1); if (tctx->identity != &tctx->__identity) kfree(tctx->identity); + percpu_counter_destroy(&tctx->inflight); kfree(tctx); tsk->io_uring = NULL; } @@ -8696,12 +8708,6 @@ void __io_uring_files_cancel(struct files_struct *files) } } -static inline bool io_uring_task_idle(struct io_uring_task *tctx) -{ - return atomic_long_read(&tctx->req_issue) == - atomic_long_read(&tctx->req_complete); -} - /* * Find any io_uring fd that this task has registered or done IO on, and cancel * requests. @@ -8710,14 +8716,16 @@ void __io_uring_task_cancel(void) { struct io_uring_task *tctx = current->io_uring; DEFINE_WAIT(wait); - long completions; + s64 inflight; /* make sure overflow events are dropped */ tctx->in_idle = true; - while (!io_uring_task_idle(tctx)) { + do { /* read completions before cancelations */ - completions = atomic_long_read(&tctx->req_complete); + inflight = percpu_counter_sum(&tctx->inflight); + if (!inflight) + break; __io_uring_files_cancel(NULL); prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE); @@ -8726,12 +8734,10 @@ void __io_uring_task_cancel(void) * If we've seen completions, retry. This avoids a race where * a completion comes in before we did prepare_to_wait(). */ - if (completions != atomic_long_read(&tctx->req_complete)) + if (inflight != percpu_counter_sum(&tctx->inflight)) continue; - if (io_uring_task_idle(tctx)) - break; schedule(); - } + } while (1); finish_wait(&tctx->wait, &wait); tctx->in_idle = false; diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 607d14f61132..28939820b6b0 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -23,13 +23,10 @@ struct io_uring_task { struct xarray xa; struct wait_queue_head wait; struct file *last; - atomic_long_t req_issue; + struct percpu_counter inflight; struct io_identity __identity; struct io_identity *identity; - - /* completion side */ - bool in_idle ____cacheline_aligned_in_smp; - atomic_long_t req_complete; + bool in_idle; }; #if defined(CONFIG_IO_URING) From 4ea33a976bfe79293965d0815e1914e4b6e58967 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Oct 2020 13:46:44 -0600 Subject: [PATCH 230/243] io-wq: inherit audit loginuid and sessionid Make sure the async io-wq workers inherit the loginuid and sessionid from the original task, and restore them to unset once we're done with the async work item. While at it, disable the ability for kernel threads to write to their own loginuid. Signed-off-by: Jens Axboe --- fs/io-wq.c | 10 ++++++++++ fs/io_uring.c | 24 +++++++++++++++++++++++- fs/proc/base.c | 4 ++++ include/linux/io_uring.h | 4 ++++ 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 0c852b75384d..7cb3b4cb9b11 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "io-wq.h" @@ -484,6 +485,10 @@ static void io_impersonate_work(struct io_worker *worker, io_wq_switch_creds(worker, work); current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize; io_wq_switch_blkcg(worker, work); +#ifdef CONFIG_AUDIT + current->loginuid = work->identity->loginuid; + current->sessionid = work->identity->sessionid; +#endif } static void io_assign_current_work(struct io_worker *worker, @@ -496,6 +501,11 @@ static void io_assign_current_work(struct io_worker *worker, cond_resched(); } +#ifdef CONFIG_AUDIT + current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET); + current->sessionid = AUDIT_SID_UNSET; +#endif + spin_lock_irq(&worker->lock); worker->cur_work = work; spin_unlock_irq(&worker->lock); diff --git a/fs/io_uring.c b/fs/io_uring.c index 58c445b95085..b9ffe98f18bc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -81,6 +81,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -327,6 +328,11 @@ struct io_ring_ctx { const struct cred *creds; +#ifdef CONFIG_AUDIT + kuid_t loginuid; + unsigned int sessionid; +#endif + struct completion ref_comp; struct completion sq_thread_comp; @@ -1057,6 +1063,10 @@ static void io_init_identity(struct io_identity *id) id->nsproxy = current->nsproxy; id->fs = current->fs; id->fsize = rlimit(RLIMIT_FSIZE); +#ifdef CONFIG_AUDIT + id->loginuid = current->loginuid; + id->sessionid = current->sessionid; +#endif refcount_set(&id->count, 1); } @@ -1316,6 +1326,11 @@ static bool io_grab_identity(struct io_kiocb *req) get_cred(id->creds); req->work.flags |= IO_WQ_WORK_CREDS; } +#ifdef CONFIG_AUDIT + if (!uid_eq(current->loginuid, id->loginuid) || + current->sessionid != id->sessionid) + return false; +#endif if (!(req->work.flags & IO_WQ_WORK_FS) && (def->work_flags & IO_WQ_WORK_FS)) { if (current->fs != id->fs) @@ -6755,6 +6770,10 @@ static int io_sq_thread(void *data) old_cred = override_creds(ctx->creds); } io_sq_thread_associate_blkcg(ctx, &cur_css); +#ifdef CONFIG_AUDIT + current->loginuid = ctx->loginuid; + current->sessionid = ctx->sessionid; +#endif ret |= __io_sq_thread(ctx, start_jiffies, cap_entries); @@ -9203,7 +9222,10 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, ctx->compat = in_compat_syscall(); ctx->user = user; ctx->creds = get_current_cred(); - +#ifdef CONFIG_AUDIT + ctx->loginuid = current->loginuid; + ctx->sessionid = current->sessionid; +#endif ctx->sqo_task = get_task_struct(current); /* diff --git a/fs/proc/base.c b/fs/proc/base.c index aa69c35d904c..0f707003dda5 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1268,6 +1268,10 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, kuid_t kloginuid; int rv; + /* Don't let kthreads write their own loginuid */ + if (current->flags & PF_KTHREAD) + return -EPERM; + rcu_read_lock(); if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { rcu_read_unlock(); diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 28939820b6b0..868364cea3b7 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -15,6 +15,10 @@ struct io_identity { struct nsproxy *nsproxy; struct fs_struct *fs; unsigned long fsize; +#ifdef CONFIG_AUDIT + kuid_t loginuid; + unsigned int sessionid; +#endif refcount_t count; }; From 58852d4d673760cf7c88b9360b3c24a041bec298 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 16 Oct 2020 20:55:56 +0100 Subject: [PATCH 231/243] io_uring: fix double poll mask init __io_queue_proc() is used by both, poll reqs and apoll. Don't use req->poll.events to copy poll mask because for apoll it aliases with private data of the request. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b9ffe98f18bc..e1726f457461 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5008,6 +5008,8 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, * for write). Setup a separate io_poll_iocb if this happens. */ if (unlikely(poll->head)) { + struct io_poll_iocb *poll_one = poll; + /* already have a 2nd entry, fail a third attempt */ if (*poll_ptr) { pt->error = -EINVAL; @@ -5018,7 +5020,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, pt->error = -ENOMEM; return; } - io_init_poll_iocb(poll, req->poll.events, io_poll_double_wake); + io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake); refcount_inc(&req->refs); poll->wait.private = req; *poll_ptr = poll; From 13bd691421bc191a402d2e0d3da5f248d170a632 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 17 Oct 2020 08:31:29 -0600 Subject: [PATCH 232/243] mm: mark async iocb read as NOWAIT once some data has been copied Once we've copied some data for an iocb that is marked with IOCB_WAITQ, we should no longer attempt to async lock a new page. Instead make sure we return the copied amount, and let the caller retry, instead of returning -EIOCBQUEUED for a new page. This should only be possible with read-ahead disabled on the below device, and multiple threads racing on the same file. Haven't been able to reproduce on anything else. Cc: stable@vger.kernel.org # v5.9 Fixes: 1a0a7853b901 ("mm: support async buffered reads in generic_file_buffered_read()") Reported-by: Kent Overstreet Signed-off-by: Jens Axboe --- mm/filemap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/filemap.c b/mm/filemap.c index 1a6beaf69f49..e4101b5bfa82 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2199,6 +2199,14 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT; offset = *ppos & ~PAGE_MASK; + /* + * If we've already successfully copied some data, then we + * can no longer safely return -EIOCBQUEUED. Hence mark + * an async read NOWAIT at that point. + */ + if (written && (iocb->ki_flags & IOCB_WAITQ)) + iocb->ki_flags |= IOCB_NOWAIT; + for (;;) { struct page *page; pgoff_t end_index; From 324bcf54c449c7b5b7024c9fa4549fbaaae1935d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 17 Oct 2020 09:25:52 -0600 Subject: [PATCH 233/243] mm: use limited read-ahead to satisfy read For the case where read-ahead is disabled on the file, or if the cgroup is congested, ensure that we can at least do 1 page of read-ahead to make progress on the read in an async fashion. This could potentially be larger, but it's not needed in terms of functionality, so let's error on the side of caution as larger counts of pages may run into reclaim issues (particularly if we're congested). This makes sure we're not hitting the potentially sync ->readpage() path for IO that is marked IOCB_WAITQ, which could cause us to block. It also means we'll use the same path for IO, regardless of whether or not read-ahead happens to be disabled on the lower level device. Acked-by: Johannes Weiner Reported-by: Matthew Wilcox (Oracle) Reported-by: Hao_Xu [axboe: updated for new ractl API] Signed-off-by: Jens Axboe --- mm/readahead.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/mm/readahead.c b/mm/readahead.c index c6ffb76827da..c5b0457415be 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -552,15 +552,23 @@ static void ondemand_readahead(struct readahead_control *ractl, void page_cache_sync_ra(struct readahead_control *ractl, struct file_ra_state *ra, unsigned long req_count) { - /* no read-ahead */ - if (!ra->ra_pages) - return; + bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM); - if (blk_cgroup_congested()) - return; + /* + * Even if read-ahead is disabled, issue this request as read-ahead + * as we'll need it to satisfy the requested range. The forced + * read-ahead will do the right thing and limit the read to just the + * requested range, which we'll set to 1 page for this case. + */ + if (!ra->ra_pages || blk_cgroup_congested()) { + if (!ractl->file) + return; + req_count = 1; + do_forced_ra = true; + } /* be dumb */ - if (ractl->file && (ractl->file->f_mode & FMODE_RANDOM)) { + if (do_forced_ra) { force_page_cache_ra(ractl, ra, req_count); return; } From 9ba0d0c81284f4ec0b24529bdba2fc68b9d6a09a Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Mon, 19 Oct 2020 16:59:42 +0800 Subject: [PATCH 234/243] io_uring: use blk_queue_nowait() to check if NOWAIT supported commit 021a24460dc2 ("block: add QUEUE_FLAG_NOWAIT") adds a new helper function blk_queue_nowait() to check if the bdev supports handling of REQ_NOWAIT or not. Since then bio-based dm device can also support REQ_NOWAIT, and currently only dm-linear supports that since commit 6abc49468eea ("dm: add support for REQ_NOWAIT and enable it for linear target"). Signed-off-by: Jeffle Xu Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e1726f457461..0f4a9c45061d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2693,7 +2693,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd) static bool io_bdev_nowait(struct block_device *bdev) { #ifdef CONFIG_BLOCK - return !bdev || queue_is_mq(bdev_get_queue(bdev)); + return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); #else return true; #endif From 61ee0674bcfd80eaa0173215d38839a48b08b56b Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 13 Oct 2020 16:41:08 +0200 Subject: [PATCH 235/243] dt-bindings: rtc: add trickle-voltage-millivolt Some RTCs have a trickle charge that is able to output different voltages depending on the type of the connected auxiliary power (battery, supercap, ...). Add a property allowing to specify the necessary voltage. Signed-off-by: Alexandre Belloni Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201013144110.1942218-1-alexandre.belloni@bootlin.com --- Documentation/devicetree/bindings/rtc/rtc.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/rtc/rtc.yaml b/Documentation/devicetree/bindings/rtc/rtc.yaml index 6b8a11325691..b1a45d2fc59a 100644 --- a/Documentation/devicetree/bindings/rtc/rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/rtc.yaml @@ -52,6 +52,12 @@ properties: Selected resistor for trickle charger. Should be given if trickle charger should be enabled. + trickle-voltage-millivolt: + description: + Selected voltage for trickle charger. Should be given + if trickle charger should be enabled and the trickle voltage is different + from the RTC main power supply. + wakeup-source: $ref: /schemas/types.yaml#/definitions/flag description: From 5ebe59a50593b1978f16a9bf4a7a887dc7a82b7b Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 13 Oct 2020 16:41:09 +0200 Subject: [PATCH 236/243] dt-bindings: rtc: rv3032: add RV-3032 bindings Document the Microcrystal RV-3032 device tree bindings Signed-off-by: Alexandre Belloni Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201013144110.1942218-2-alexandre.belloni@bootlin.com --- .../bindings/rtc/microcrystal,rv3032.yaml | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 Documentation/devicetree/bindings/rtc/microcrystal,rv3032.yaml diff --git a/Documentation/devicetree/bindings/rtc/microcrystal,rv3032.yaml b/Documentation/devicetree/bindings/rtc/microcrystal,rv3032.yaml new file mode 100644 index 000000000000..a2c55303810d --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/microcrystal,rv3032.yaml @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/microcrystal,rv3032.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip RV-3032 RTC Device Tree Bindings + +allOf: + - $ref: "rtc.yaml#" + +maintainers: + - Alexandre Belloni + +properties: + compatible: + const: microcrystal,rv3032 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + start-year: true + + trickle-resistor-ohms: + enum: + - 1000 + - 2000 + - 7000 + - 11000 + + trickle-voltage-millivolt: + enum: + - 1750 + - 3000 + - 4400 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + rtc@51 { + compatible = "microcrystal,rv3032"; + reg = <0x51>; + status = "okay"; + pinctrl-0 = <&rtc_nint_pins>; + interrupts-extended = <&gpio1 16 IRQ_TYPE_LEVEL_HIGH>; + trickle-resistor-ohms = <7000>; + trickle-voltage-millivolt = <1750>; + }; + }; + +... From 2eeaa532accab8810ca9fe21f52d149713561235 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 13 Oct 2020 16:41:10 +0200 Subject: [PATCH 237/243] rtc: rv3032: Add a driver for Microcrystal RV-3032 New driver for the Microcrystal RV-3032, including support for: - Date/time - Alarms - Low voltage detection - Trickle charge - Trimming - Clkout - RAM - EEPROM - Temperature sensor Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201013144110.1942218-3-alexandre.belloni@bootlin.com --- drivers/rtc/Kconfig | 10 + drivers/rtc/Makefile | 1 + drivers/rtc/rtc-rv3032.c | 925 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 936 insertions(+) create mode 100644 drivers/rtc/rtc-rv3032.c diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 48c536acd777..65ad9d0b47ab 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -669,6 +669,16 @@ config RTC_DRV_RV3028 This driver can also be built as a module. If so, the module will be called rtc-rv3028. +config RTC_DRV_RV3032 + tristate "Micro Crystal RV3032" + select REGMAP_I2C + help + If you say yes here you get support for the Micro Crystal + RV3032. + + This driver can also be built as a module. If so, the module + will be called rtc-rv3032. + config RTC_DRV_RV8803 tristate "Micro Crystal RV8803, Epson RX8900" help diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 880e08a409c3..bfb57464118d 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -141,6 +141,7 @@ obj-$(CONFIG_RTC_DRV_RS5C372) += rtc-rs5c372.o obj-$(CONFIG_RTC_DRV_RTD119X) += rtc-rtd119x.o obj-$(CONFIG_RTC_DRV_RV3028) += rtc-rv3028.o obj-$(CONFIG_RTC_DRV_RV3029C2) += rtc-rv3029c2.o +obj-$(CONFIG_RTC_DRV_RV3032) += rtc-rv3032.o obj-$(CONFIG_RTC_DRV_RV8803) += rtc-rv8803.o obj-$(CONFIG_RTC_DRV_RX4581) += rtc-rx4581.o obj-$(CONFIG_RTC_DRV_RX6110) += rtc-rx6110.o diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c new file mode 100644 index 000000000000..3e67f71f4261 --- /dev/null +++ b/drivers/rtc/rtc-rv3032.c @@ -0,0 +1,925 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RTC driver for the Micro Crystal RV3032 + * + * Copyright (C) 2020 Micro Crystal SA + * + * Alexandre Belloni + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RV3032_SEC 0x01 +#define RV3032_MIN 0x02 +#define RV3032_HOUR 0x03 +#define RV3032_WDAY 0x04 +#define RV3032_DAY 0x05 +#define RV3032_MONTH 0x06 +#define RV3032_YEAR 0x07 +#define RV3032_ALARM_MIN 0x08 +#define RV3032_ALARM_HOUR 0x09 +#define RV3032_ALARM_DAY 0x0A +#define RV3032_STATUS 0x0D +#define RV3032_TLSB 0x0E +#define RV3032_TMSB 0x0F +#define RV3032_CTRL1 0x10 +#define RV3032_CTRL2 0x11 +#define RV3032_CTRL3 0x12 +#define RV3032_TS_CTRL 0x13 +#define RV3032_CLK_IRQ 0x14 +#define RV3032_EEPROM_ADDR 0x3D +#define RV3032_EEPROM_DATA 0x3E +#define RV3032_EEPROM_CMD 0x3F +#define RV3032_RAM1 0x40 +#define RV3032_PMU 0xC0 +#define RV3032_OFFSET 0xC1 +#define RV3032_CLKOUT1 0xC2 +#define RV3032_CLKOUT2 0xC3 +#define RV3032_TREF0 0xC4 +#define RV3032_TREF1 0xC5 + +#define RV3032_STATUS_VLF BIT(0) +#define RV3032_STATUS_PORF BIT(1) +#define RV3032_STATUS_EVF BIT(2) +#define RV3032_STATUS_AF BIT(3) +#define RV3032_STATUS_TF BIT(4) +#define RV3032_STATUS_UF BIT(5) +#define RV3032_STATUS_TLF BIT(6) +#define RV3032_STATUS_THF BIT(7) + +#define RV3032_TLSB_CLKF BIT(1) +#define RV3032_TLSB_EEBUSY BIT(2) +#define RV3032_TLSB_TEMP GENMASK(7, 4) + +#define RV3032_CLKOUT2_HFD_MSK GENMASK(4, 0) +#define RV3032_CLKOUT2_FD_MSK GENMASK(6, 5) +#define RV3032_CLKOUT2_OS BIT(7) + +#define RV3032_CTRL1_EERD BIT(3) +#define RV3032_CTRL1_WADA BIT(5) + +#define RV3032_CTRL2_STOP BIT(0) +#define RV3032_CTRL2_EIE BIT(2) +#define RV3032_CTRL2_AIE BIT(3) +#define RV3032_CTRL2_TIE BIT(4) +#define RV3032_CTRL2_UIE BIT(5) +#define RV3032_CTRL2_CLKIE BIT(6) +#define RV3032_CTRL2_TSE BIT(7) + +#define RV3032_PMU_TCM GENMASK(1, 0) +#define RV3032_PMU_TCR GENMASK(3, 2) +#define RV3032_PMU_BSM GENMASK(5, 4) +#define RV3032_PMU_NCLKE BIT(6) + +#define RV3032_PMU_BSM_DSM 1 +#define RV3032_PMU_BSM_LSM 2 + +#define RV3032_OFFSET_MSK GENMASK(5, 0) + +#define RV3032_EVT_CTRL_TSR BIT(2) + +#define RV3032_EEPROM_CMD_UPDATE 0x11 +#define RV3032_EEPROM_CMD_WRITE 0x21 +#define RV3032_EEPROM_CMD_READ 0x22 + +#define RV3032_EEPROM_USER 0xCB + +#define RV3032_EEBUSY_POLL 10000 +#define RV3032_EEBUSY_TIMEOUT 100000 + +#define OFFSET_STEP_PPT 238419 + +struct rv3032_data { + struct regmap *regmap; + struct rtc_device *rtc; +#ifdef CONFIG_COMMON_CLK + struct clk_hw clkout_hw; +#endif +}; + +static u16 rv3032_trickle_resistors[] = {1000, 2000, 7000, 11000}; +static u16 rv3032_trickle_voltages[] = {0, 1750, 3000, 4400}; + +static int rv3032_exit_eerd(struct rv3032_data *rv3032, u32 eerd) +{ + if (eerd) + return 0; + + return regmap_update_bits(rv3032->regmap, RV3032_CTRL1, RV3032_CTRL1_EERD, 0); +} + +static int rv3032_enter_eerd(struct rv3032_data *rv3032, u32 *eerd) +{ + u32 ctrl1, status; + int ret; + + ret = regmap_read(rv3032->regmap, RV3032_CTRL1, &ctrl1); + if (ret) + return ret; + + *eerd = ctrl1 & RV3032_CTRL1_EERD; + if (*eerd) + return 0; + + ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL1, + RV3032_CTRL1_EERD, RV3032_CTRL1_EERD); + if (ret) + return ret; + + ret = regmap_read_poll_timeout(rv3032->regmap, RV3032_TLSB, status, + !(status & RV3032_TLSB_EEBUSY), + RV3032_EEBUSY_POLL, RV3032_EEBUSY_TIMEOUT); + if (ret) { + rv3032_exit_eerd(rv3032, *eerd); + + return ret; + } + + return 0; +} + +static int rv3032_update_cfg(struct rv3032_data *rv3032, unsigned int reg, + unsigned int mask, unsigned int val) +{ + u32 status, eerd; + int ret; + + ret = rv3032_enter_eerd(rv3032, &eerd); + if (ret) + return ret; + + ret = regmap_update_bits(rv3032->regmap, reg, mask, val); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_CMD, RV3032_EEPROM_CMD_UPDATE); + if (ret) + goto exit_eerd; + + usleep_range(46000, RV3032_EEBUSY_TIMEOUT); + + ret = regmap_read_poll_timeout(rv3032->regmap, RV3032_TLSB, status, + !(status & RV3032_TLSB_EEBUSY), + RV3032_EEBUSY_POLL, RV3032_EEBUSY_TIMEOUT); + +exit_eerd: + rv3032_exit_eerd(rv3032, eerd); + + return ret; +} + +static irqreturn_t rv3032_handle_irq(int irq, void *dev_id) +{ + struct rv3032_data *rv3032 = dev_id; + unsigned long events = 0; + u32 status = 0, ctrl = 0; + + if (regmap_read(rv3032->regmap, RV3032_STATUS, &status) < 0 || + status == 0) { + return IRQ_NONE; + } + + if (status & RV3032_STATUS_TF) { + status |= RV3032_STATUS_TF; + ctrl |= RV3032_CTRL2_TIE; + events |= RTC_PF; + } + + if (status & RV3032_STATUS_AF) { + status |= RV3032_STATUS_AF; + ctrl |= RV3032_CTRL2_AIE; + events |= RTC_AF; + } + + if (status & RV3032_STATUS_UF) { + status |= RV3032_STATUS_UF; + ctrl |= RV3032_CTRL2_UIE; + events |= RTC_UF; + } + + if (events) { + rtc_update_irq(rv3032->rtc, 1, events); + regmap_update_bits(rv3032->regmap, RV3032_STATUS, status, 0); + regmap_update_bits(rv3032->regmap, RV3032_CTRL2, ctrl, 0); + } + + return IRQ_HANDLED; +} + +static int rv3032_get_time(struct device *dev, struct rtc_time *tm) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + u8 date[7]; + int ret, status; + + ret = regmap_read(rv3032->regmap, RV3032_STATUS, &status); + if (ret < 0) + return ret; + + if (status & (RV3032_STATUS_PORF | RV3032_STATUS_VLF)) + return -EINVAL; + + ret = regmap_bulk_read(rv3032->regmap, RV3032_SEC, date, sizeof(date)); + if (ret) + return ret; + + tm->tm_sec = bcd2bin(date[0] & 0x7f); + tm->tm_min = bcd2bin(date[1] & 0x7f); + tm->tm_hour = bcd2bin(date[2] & 0x3f); + tm->tm_wday = date[3] & 0x7; + tm->tm_mday = bcd2bin(date[4] & 0x3f); + tm->tm_mon = bcd2bin(date[5] & 0x1f) - 1; + tm->tm_year = bcd2bin(date[6]) + 100; + + return 0; +} + +static int rv3032_set_time(struct device *dev, struct rtc_time *tm) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + u8 date[7]; + int ret; + + date[0] = bin2bcd(tm->tm_sec); + date[1] = bin2bcd(tm->tm_min); + date[2] = bin2bcd(tm->tm_hour); + date[3] = tm->tm_wday; + date[4] = bin2bcd(tm->tm_mday); + date[5] = bin2bcd(tm->tm_mon + 1); + date[6] = bin2bcd(tm->tm_year - 100); + + ret = regmap_bulk_write(rv3032->regmap, RV3032_SEC, date, + sizeof(date)); + if (ret) + return ret; + + ret = regmap_update_bits(rv3032->regmap, RV3032_STATUS, + RV3032_STATUS_PORF | RV3032_STATUS_VLF, 0); + + return ret; +} + +static int rv3032_get_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + u8 alarmvals[3]; + int status, ctrl, ret; + + ret = regmap_bulk_read(rv3032->regmap, RV3032_ALARM_MIN, alarmvals, + sizeof(alarmvals)); + if (ret) + return ret; + + ret = regmap_read(rv3032->regmap, RV3032_STATUS, &status); + if (ret < 0) + return ret; + + ret = regmap_read(rv3032->regmap, RV3032_CTRL2, &ctrl); + if (ret < 0) + return ret; + + alrm->time.tm_sec = 0; + alrm->time.tm_min = bcd2bin(alarmvals[0] & 0x7f); + alrm->time.tm_hour = bcd2bin(alarmvals[1] & 0x3f); + alrm->time.tm_mday = bcd2bin(alarmvals[2] & 0x3f); + + alrm->enabled = !!(ctrl & RV3032_CTRL2_AIE); + alrm->pending = (status & RV3032_STATUS_AF) && alrm->enabled; + + return 0; +} + +static int rv3032_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + u8 alarmvals[3]; + u8 ctrl = 0; + int ret; + + /* The alarm has no seconds, round up to nearest minute */ + if (alrm->time.tm_sec) { + time64_t alarm_time = rtc_tm_to_time64(&alrm->time); + + alarm_time += 60 - alrm->time.tm_sec; + rtc_time64_to_tm(alarm_time, &alrm->time); + } + + ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL2, + RV3032_CTRL2_AIE | RV3032_CTRL2_UIE, 0); + if (ret) + return ret; + + alarmvals[0] = bin2bcd(alrm->time.tm_min); + alarmvals[1] = bin2bcd(alrm->time.tm_hour); + alarmvals[2] = bin2bcd(alrm->time.tm_mday); + + ret = regmap_update_bits(rv3032->regmap, RV3032_STATUS, + RV3032_STATUS_AF, 0); + if (ret) + return ret; + + ret = regmap_bulk_write(rv3032->regmap, RV3032_ALARM_MIN, alarmvals, + sizeof(alarmvals)); + if (ret) + return ret; + + if (alrm->enabled) { + if (rv3032->rtc->uie_rtctimer.enabled) + ctrl |= RV3032_CTRL2_UIE; + if (rv3032->rtc->aie_timer.enabled) + ctrl |= RV3032_CTRL2_AIE; + } + + ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL2, + RV3032_CTRL2_UIE | RV3032_CTRL2_AIE, ctrl); + + return ret; +} + +static int rv3032_alarm_irq_enable(struct device *dev, unsigned int enabled) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + int ctrl = 0, ret; + + if (enabled) { + if (rv3032->rtc->uie_rtctimer.enabled) + ctrl |= RV3032_CTRL2_UIE; + if (rv3032->rtc->aie_timer.enabled) + ctrl |= RV3032_CTRL2_AIE; + } + + ret = regmap_update_bits(rv3032->regmap, RV3032_STATUS, + RV3032_STATUS_AF | RV3032_STATUS_UF, 0); + if (ret) + return ret; + + ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL2, + RV3032_CTRL2_UIE | RV3032_CTRL2_AIE, ctrl); + if (ret) + return ret; + + return 0; +} + +static int rv3032_read_offset(struct device *dev, long *offset) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + int ret, value, steps; + + ret = regmap_read(rv3032->regmap, RV3032_OFFSET, &value); + if (ret < 0) + return ret; + + steps = sign_extend32(FIELD_GET(RV3032_OFFSET_MSK, value), 5); + + *offset = DIV_ROUND_CLOSEST(steps * OFFSET_STEP_PPT, 1000); + + return 0; +} + +static int rv3032_set_offset(struct device *dev, long offset) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + + offset = clamp(offset, -7629L, 7391L) * 1000; + offset = DIV_ROUND_CLOSEST(offset, OFFSET_STEP_PPT); + + return rv3032_update_cfg(rv3032, RV3032_OFFSET, RV3032_OFFSET_MSK, + FIELD_PREP(RV3032_OFFSET_MSK, offset)); +} + +static int rv3032_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + int status, val = 0, ret = 0; + + switch (cmd) { + case RTC_VL_READ: + ret = regmap_read(rv3032->regmap, RV3032_STATUS, &status); + if (ret < 0) + return ret; + + if (status & (RV3032_STATUS_PORF | RV3032_STATUS_VLF)) + val = RTC_VL_DATA_INVALID; + return put_user(val, (unsigned int __user *)arg); + + default: + return -ENOIOCTLCMD; + } +} + +static int rv3032_nvram_write(void *priv, unsigned int offset, void *val, size_t bytes) +{ + return regmap_bulk_write(priv, RV3032_RAM1 + offset, val, bytes); +} + +static int rv3032_nvram_read(void *priv, unsigned int offset, void *val, size_t bytes) +{ + return regmap_bulk_read(priv, RV3032_RAM1 + offset, val, bytes); +} + +static int rv3032_eeprom_write(void *priv, unsigned int offset, void *val, size_t bytes) +{ + struct rv3032_data *rv3032 = priv; + u32 status, eerd; + int i, ret; + u8 *buf = val; + + ret = rv3032_enter_eerd(rv3032, &eerd); + if (ret) + return ret; + + for (i = 0; i < bytes; i++) { + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_ADDR, + RV3032_EEPROM_USER + offset + i); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_DATA, buf[i]); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_CMD, + RV3032_EEPROM_CMD_WRITE); + if (ret) + goto exit_eerd; + + usleep_range(RV3032_EEBUSY_POLL, RV3032_EEBUSY_TIMEOUT); + + ret = regmap_read_poll_timeout(rv3032->regmap, RV3032_TLSB, status, + !(status & RV3032_TLSB_EEBUSY), + RV3032_EEBUSY_POLL, RV3032_EEBUSY_TIMEOUT); + if (ret) + goto exit_eerd; + } + +exit_eerd: + rv3032_exit_eerd(rv3032, eerd); + + return ret; +} + +static int rv3032_eeprom_read(void *priv, unsigned int offset, void *val, size_t bytes) +{ + struct rv3032_data *rv3032 = priv; + u32 status, eerd, data; + int i, ret; + u8 *buf = val; + + ret = rv3032_enter_eerd(rv3032, &eerd); + if (ret) + return ret; + + for (i = 0; i < bytes; i++) { + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_ADDR, + RV3032_EEPROM_USER + offset + i); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_CMD, + RV3032_EEPROM_CMD_READ); + if (ret) + goto exit_eerd; + + ret = regmap_read_poll_timeout(rv3032->regmap, RV3032_TLSB, status, + !(status & RV3032_TLSB_EEBUSY), + RV3032_EEBUSY_POLL, RV3032_EEBUSY_TIMEOUT); + if (ret) + goto exit_eerd; + + ret = regmap_read(rv3032->regmap, RV3032_EEPROM_DATA, &data); + if (ret) + goto exit_eerd; + buf[i] = data; + } + +exit_eerd: + rv3032_exit_eerd(rv3032, eerd); + + return ret; +} + +static int rv3032_trickle_charger_setup(struct device *dev, struct rv3032_data *rv3032) +{ + u32 val, ohms, voltage; + int i; + + val = FIELD_PREP(RV3032_PMU_TCM, 1) | FIELD_PREP(RV3032_PMU_BSM, RV3032_PMU_BSM_DSM); + if (!device_property_read_u32(dev, "trickle-voltage-millivolt", &voltage)) { + for (i = 0; i < ARRAY_SIZE(rv3032_trickle_voltages); i++) + if (voltage == rv3032_trickle_voltages[i]) + break; + if (i < ARRAY_SIZE(rv3032_trickle_voltages)) + val = FIELD_PREP(RV3032_PMU_TCM, i) | + FIELD_PREP(RV3032_PMU_BSM, RV3032_PMU_BSM_LSM); + } + + if (device_property_read_u32(dev, "trickle-resistor-ohms", &ohms)) + return 0; + + for (i = 0; i < ARRAY_SIZE(rv3032_trickle_resistors); i++) + if (ohms == rv3032_trickle_resistors[i]) + break; + + if (i >= ARRAY_SIZE(rv3032_trickle_resistors)) { + dev_warn(dev, "invalid trickle resistor value\n"); + + return 0; + } + + return rv3032_update_cfg(rv3032, RV3032_PMU, + RV3032_PMU_TCR | RV3032_PMU_TCM | RV3032_PMU_BSM, + val | FIELD_PREP(RV3032_PMU_TCR, i)); +} + +#ifdef CONFIG_COMMON_CLK +#define clkout_hw_to_rv3032(hw) container_of(hw, struct rv3032_data, clkout_hw) + +static int clkout_xtal_rates[] = { + 32768, + 1024, + 64, + 1, +}; + +#define RV3032_HFD_STEP 8192 + +static unsigned long rv3032_clkout_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + int clkout, ret; + struct rv3032_data *rv3032 = clkout_hw_to_rv3032(hw); + + ret = regmap_read(rv3032->regmap, RV3032_CLKOUT2, &clkout); + if (ret < 0) + return 0; + + if (clkout & RV3032_CLKOUT2_OS) { + unsigned long rate = FIELD_GET(RV3032_CLKOUT2_HFD_MSK, clkout) << 8; + + ret = regmap_read(rv3032->regmap, RV3032_CLKOUT1, &clkout); + if (ret < 0) + return 0; + + rate += clkout + 1; + + return rate * RV3032_HFD_STEP; + } + + return clkout_xtal_rates[FIELD_GET(RV3032_CLKOUT2_FD_MSK, clkout)]; +} + +static long rv3032_clkout_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + int i, hfd; + + if (rate < RV3032_HFD_STEP) + for (i = 0; i < ARRAY_SIZE(clkout_xtal_rates); i++) + if (clkout_xtal_rates[i] <= rate) + return clkout_xtal_rates[i]; + + hfd = DIV_ROUND_CLOSEST(rate, RV3032_HFD_STEP); + + return RV3032_HFD_STEP * clamp(hfd, 0, 8192); +} + +static int rv3032_clkout_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct rv3032_data *rv3032 = clkout_hw_to_rv3032(hw); + u32 status, eerd; + int i, hfd, ret; + + for (i = 0; i < ARRAY_SIZE(clkout_xtal_rates); i++) { + if (clkout_xtal_rates[i] == rate) { + return rv3032_update_cfg(rv3032, RV3032_CLKOUT2, 0xff, + FIELD_PREP(RV3032_CLKOUT2_FD_MSK, i)); + } + } + + hfd = DIV_ROUND_CLOSEST(rate, RV3032_HFD_STEP); + hfd = clamp(hfd, 1, 8192) - 1; + + ret = rv3032_enter_eerd(rv3032, &eerd); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3032->regmap, RV3032_CLKOUT1, hfd & 0xff); + if (ret) + return ret; + + ret = regmap_write(rv3032->regmap, RV3032_CLKOUT2, RV3032_CLKOUT2_OS | + FIELD_PREP(RV3032_CLKOUT2_HFD_MSK, hfd >> 8)); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_CMD, RV3032_EEPROM_CMD_UPDATE); + if (ret) + goto exit_eerd; + + usleep_range(46000, RV3032_EEBUSY_TIMEOUT); + + ret = regmap_read_poll_timeout(rv3032->regmap, RV3032_TLSB, status, + !(status & RV3032_TLSB_EEBUSY), + RV3032_EEBUSY_POLL, RV3032_EEBUSY_TIMEOUT); + +exit_eerd: + rv3032_exit_eerd(rv3032, eerd); + + return ret; +} + +static int rv3032_clkout_prepare(struct clk_hw *hw) +{ + struct rv3032_data *rv3032 = clkout_hw_to_rv3032(hw); + + return rv3032_update_cfg(rv3032, RV3032_PMU, RV3032_PMU_NCLKE, 0); +} + +static void rv3032_clkout_unprepare(struct clk_hw *hw) +{ + struct rv3032_data *rv3032 = clkout_hw_to_rv3032(hw); + + rv3032_update_cfg(rv3032, RV3032_PMU, RV3032_PMU_NCLKE, RV3032_PMU_NCLKE); +} + +static int rv3032_clkout_is_prepared(struct clk_hw *hw) +{ + int val, ret; + struct rv3032_data *rv3032 = clkout_hw_to_rv3032(hw); + + ret = regmap_read(rv3032->regmap, RV3032_PMU, &val); + if (ret < 0) + return ret; + + return !(val & RV3032_PMU_NCLKE); +} + +static const struct clk_ops rv3032_clkout_ops = { + .prepare = rv3032_clkout_prepare, + .unprepare = rv3032_clkout_unprepare, + .is_prepared = rv3032_clkout_is_prepared, + .recalc_rate = rv3032_clkout_recalc_rate, + .round_rate = rv3032_clkout_round_rate, + .set_rate = rv3032_clkout_set_rate, +}; + +static int rv3032_clkout_register_clk(struct rv3032_data *rv3032, + struct i2c_client *client) +{ + int ret; + struct clk *clk; + struct clk_init_data init; + struct device_node *node = client->dev.of_node; + + ret = regmap_update_bits(rv3032->regmap, RV3032_TLSB, RV3032_TLSB_CLKF, 0); + if (ret < 0) + return ret; + + ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL2, RV3032_CTRL2_CLKIE, 0); + if (ret < 0) + return ret; + + ret = regmap_write(rv3032->regmap, RV3032_CLK_IRQ, 0); + if (ret < 0) + return ret; + + init.name = "rv3032-clkout"; + init.ops = &rv3032_clkout_ops; + init.flags = 0; + init.parent_names = NULL; + init.num_parents = 0; + rv3032->clkout_hw.init = &init; + + of_property_read_string(node, "clock-output-names", &init.name); + + clk = devm_clk_register(&client->dev, &rv3032->clkout_hw); + if (!IS_ERR(clk)) + of_clk_add_provider(node, of_clk_src_simple_get, clk); + + return 0; +} +#endif + +static int rv3032_hwmon_read_temp(struct device *dev, long *mC) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + u8 buf[2]; + int temp, prev = 0; + int ret; + + ret = regmap_bulk_read(rv3032->regmap, RV3032_TLSB, buf, sizeof(buf)); + if (ret) + return ret; + + temp = sign_extend32(buf[1], 7) << 4; + temp |= FIELD_GET(RV3032_TLSB_TEMP, buf[0]); + + /* No blocking or shadowing on RV3032_TLSB and RV3032_TMSB */ + do { + prev = temp; + + ret = regmap_bulk_read(rv3032->regmap, RV3032_TLSB, buf, sizeof(buf)); + if (ret) + return ret; + + temp = sign_extend32(buf[1], 7) << 4; + temp |= FIELD_GET(RV3032_TLSB_TEMP, buf[0]); + } while (temp != prev); + + *mC = (temp * 1000) / 16; + + return 0; +} + +static umode_t rv3032_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, + u32 attr, int channel) +{ + if (type != hwmon_temp) + return 0; + + switch (attr) { + case hwmon_temp_input: + return 0444; + default: + return 0; + } +} + +static int rv3032_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *temp) +{ + int err; + + switch (attr) { + case hwmon_temp_input: + err = rv3032_hwmon_read_temp(dev, temp); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static const struct hwmon_channel_info *rv3032_hwmon_info[] = { + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MAX_HYST), + NULL +}; + +static const struct hwmon_ops rv3032_hwmon_hwmon_ops = { + .is_visible = rv3032_hwmon_is_visible, + .read = rv3032_hwmon_read, +}; + +static const struct hwmon_chip_info rv3032_hwmon_chip_info = { + .ops = &rv3032_hwmon_hwmon_ops, + .info = rv3032_hwmon_info, +}; + +static void rv3032_hwmon_register(struct device *dev) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + + if (!IS_REACHABLE(CONFIG_HWMON)) + return; + + devm_hwmon_device_register_with_info(dev, "rv3032", rv3032, &rv3032_hwmon_chip_info, NULL); +} + +static struct rtc_class_ops rv3032_rtc_ops = { + .read_time = rv3032_get_time, + .set_time = rv3032_set_time, + .read_offset = rv3032_read_offset, + .set_offset = rv3032_set_offset, + .ioctl = rv3032_ioctl, +}; + +static const struct regmap_config regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0xCA, +}; + +static int rv3032_probe(struct i2c_client *client) +{ + struct rv3032_data *rv3032; + int ret, status; + struct nvmem_config nvmem_cfg = { + .name = "rv3032_nvram", + .word_size = 1, + .stride = 1, + .size = 16, + .type = NVMEM_TYPE_BATTERY_BACKED, + .reg_read = rv3032_nvram_read, + .reg_write = rv3032_nvram_write, + }; + struct nvmem_config eeprom_cfg = { + .name = "rv3032_eeprom", + .word_size = 1, + .stride = 1, + .size = 32, + .type = NVMEM_TYPE_EEPROM, + .reg_read = rv3032_eeprom_read, + .reg_write = rv3032_eeprom_write, + }; + + rv3032 = devm_kzalloc(&client->dev, sizeof(struct rv3032_data), + GFP_KERNEL); + if (!rv3032) + return -ENOMEM; + + rv3032->regmap = devm_regmap_init_i2c(client, ®map_config); + if (IS_ERR(rv3032->regmap)) + return PTR_ERR(rv3032->regmap); + + i2c_set_clientdata(client, rv3032); + + ret = regmap_read(rv3032->regmap, RV3032_STATUS, &status); + if (ret < 0) + return ret; + + rv3032->rtc = devm_rtc_allocate_device(&client->dev); + if (IS_ERR(rv3032->rtc)) + return PTR_ERR(rv3032->rtc); + + if (client->irq > 0) { + ret = devm_request_threaded_irq(&client->dev, client->irq, + NULL, rv3032_handle_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + "rv3032", rv3032); + if (ret) { + dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n"); + client->irq = 0; + } else { + rv3032_rtc_ops.read_alarm = rv3032_get_alarm; + rv3032_rtc_ops.set_alarm = rv3032_set_alarm; + rv3032_rtc_ops.alarm_irq_enable = rv3032_alarm_irq_enable; + } + } + + ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL1, + RV3032_CTRL1_WADA, RV3032_CTRL1_WADA); + if (ret) + return ret; + + rv3032_trickle_charger_setup(&client->dev, rv3032); + + rv3032->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rv3032->rtc->range_max = RTC_TIMESTAMP_END_2099; + rv3032->rtc->ops = &rv3032_rtc_ops; + ret = rtc_register_device(rv3032->rtc); + if (ret) + return ret; + + nvmem_cfg.priv = rv3032; + rtc_nvmem_register(rv3032->rtc, &nvmem_cfg); + eeprom_cfg.priv = rv3032; + rtc_nvmem_register(rv3032->rtc, &eeprom_cfg); + + rv3032->rtc->max_user_freq = 1; + +#ifdef CONFIG_COMMON_CLK + rv3032_clkout_register_clk(rv3032, client); +#endif + + rv3032_hwmon_register(&client->dev); + + return 0; +} + +static const struct of_device_id rv3032_of_match[] = { + { .compatible = "microcrystal,rv3032", }, + { } +}; +MODULE_DEVICE_TABLE(of, rv3032_of_match); + +static struct i2c_driver rv3032_driver = { + .driver = { + .name = "rtc-rv3032", + .of_match_table = of_match_ptr(rv3032_of_match), + }, + .probe_new = rv3032_probe, +}; +module_i2c_driver(rv3032_driver); + +MODULE_AUTHOR("Alexandre Belloni "); +MODULE_DESCRIPTION("Micro Crystal RV3032 RTC driver"); +MODULE_LICENSE("GPL v2"); From 92c6dcfbd1eb803d4669c82b3d8a0fcbb803e3f9 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 15 Oct 2020 21:11:30 +0200 Subject: [PATCH 238/243] rtc: r9701: remove leftover comment Commit 22652ba72453 ("rtc: stop validating rtc_time in .read_time") removed the code but not the associated comment. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201015191135.471249-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-r9701.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c index 84f0d25259ae..eb00879f7c9a 100644 --- a/drivers/rtc/rtc-r9701.c +++ b/drivers/rtc/rtc-r9701.c @@ -85,10 +85,6 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt) dt->tm_mon = bcd2bin(buf[4]) - 1; /* RMONCNT */ dt->tm_year = bcd2bin(buf[5]) + 100; /* RYRCNT */ - /* the rtc device may contain illegal values on power up - * according to the data sheet. make sure they are valid. - */ - return 0; } From 7390bec4ed5d510d1a637257ff75e9ab49030411 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 15 Oct 2020 21:11:31 +0200 Subject: [PATCH 239/243] rtc: r9701: stop setting a default time It doesn't make sense to set the RTC to a default value at probe time. Let the core handle invalid date and time. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201015191135.471249-2-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-r9701.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c index eb00879f7c9a..f8f7044ff808 100644 --- a/drivers/rtc/rtc-r9701.c +++ b/drivers/rtc/rtc-r9701.c @@ -115,7 +115,6 @@ static const struct rtc_class_ops r9701_rtc_ops = { static int r9701_probe(struct spi_device *spi) { struct rtc_device *rtc; - struct rtc_time dt; unsigned char tmp; int res; @@ -126,27 +125,6 @@ static int r9701_probe(struct spi_device *spi) return -ENODEV; } - /* - * The device seems to be present. Now check if the registers - * contain invalid values. If so, try to write a default date: - * 2000/1/1 00:00:00 - */ - if (r9701_get_datetime(&spi->dev, &dt)) { - dev_info(&spi->dev, "trying to repair invalid date/time\n"); - dt.tm_sec = 0; - dt.tm_min = 0; - dt.tm_hour = 0; - dt.tm_mday = 1; - dt.tm_mon = 0; - dt.tm_year = 100; - - if (r9701_set_datetime(&spi->dev, &dt) || - r9701_get_datetime(&spi->dev, &dt)) { - dev_err(&spi->dev, "cannot repair RTC register\n"); - return -ENODEV; - } - } - rtc = devm_rtc_device_register(&spi->dev, "r9701", &r9701_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) From 2a8f3380c9e50a36b99a92febfd78f7c7afd29b0 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 15 Oct 2020 21:11:32 +0200 Subject: [PATCH 240/243] rtc: r9701: remove useless memset The RTC core already sets to zero the struct rtc_tie it passes to the driver, avoid doing it a second time. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201015191135.471249-3-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-r9701.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c index f8f7044ff808..4b688e9c4192 100644 --- a/drivers/rtc/rtc-r9701.c +++ b/drivers/rtc/rtc-r9701.c @@ -75,8 +75,6 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt) if (ret) return ret; - memset(dt, 0, sizeof(*dt)); - dt->tm_sec = bcd2bin(buf[0]); /* RSECCNT */ dt->tm_min = bcd2bin(buf[1]); /* RMINCNT */ dt->tm_hour = bcd2bin(buf[2]); /* RHRCNT */ From 8b34134907e7d70b8b51fa56ecd4f8c50c46692c Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 15 Oct 2020 21:11:33 +0200 Subject: [PATCH 241/243] rtc: r9701: stop setting RWKCNT tm_wday is never checked for validity and it is not read back in r9701_get_datetime. Avoid setting it to stop tripping static checkers: drivers/rtc/rtc-r9701.c:109 r9701_set_datetime() error: undefined (user controlled) shift '1 << dt->tm_wday' Reported-by: Dan Carpenter Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201015191135.471249-4-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-r9701.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c index 4b688e9c4192..183c5a0fe78c 100644 --- a/drivers/rtc/rtc-r9701.c +++ b/drivers/rtc/rtc-r9701.c @@ -100,7 +100,6 @@ static int r9701_set_datetime(struct device *dev, struct rtc_time *dt) ret = ret ? ret : write_reg(dev, RDAYCNT, bin2bcd(dt->tm_mday)); ret = ret ? ret : write_reg(dev, RMONCNT, bin2bcd(dt->tm_mon + 1)); ret = ret ? ret : write_reg(dev, RYRCNT, bin2bcd(dt->tm_year - 100)); - ret = ret ? ret : write_reg(dev, RWKCNT, 1 << dt->tm_wday); return ret; } From dfe13cf2ae5a7cdb131e61a8aae4fb27cd379bd4 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 15 Oct 2020 21:11:34 +0200 Subject: [PATCH 242/243] rtc: r9701: convert to devm_rtc_allocate_device This allows further improvement of the driver. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201015191135.471249-5-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-r9701.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c index 183c5a0fe78c..9165c180b0e6 100644 --- a/drivers/rtc/rtc-r9701.c +++ b/drivers/rtc/rtc-r9701.c @@ -122,14 +122,14 @@ static int r9701_probe(struct spi_device *spi) return -ENODEV; } - rtc = devm_rtc_device_register(&spi->dev, "r9701", - &r9701_rtc_ops, THIS_MODULE); + rtc = devm_rtc_allocate_device(&spi->dev); if (IS_ERR(rtc)) return PTR_ERR(rtc); spi_set_drvdata(spi, rtc); + rtc->ops = &r9701_rtc_ops; - return 0; + return rtc_register_device(rtc); } static struct spi_driver r9701_driver = { From 35331b506f6c67a0b4042fac1ae2785cef9ac8c3 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 15 Oct 2020 21:11:35 +0200 Subject: [PATCH 243/243] rtc: r9701: set range Set range and remove the set_time check. This is a classic BCD RTC. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201015191135.471249-6-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-r9701.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c index 9165c180b0e6..7ceb968f0e44 100644 --- a/drivers/rtc/rtc-r9701.c +++ b/drivers/rtc/rtc-r9701.c @@ -88,11 +88,7 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt) static int r9701_set_datetime(struct device *dev, struct rtc_time *dt) { - int ret, year; - - year = dt->tm_year + 1900; - if (year >= 2100 || year < 2000) - return -EINVAL; + int ret; ret = write_reg(dev, RHRCNT, bin2bcd(dt->tm_hour)); ret = ret ? ret : write_reg(dev, RMINCNT, bin2bcd(dt->tm_min)); @@ -128,6 +124,8 @@ static int r9701_probe(struct spi_device *spi) spi_set_drvdata(spi, rtc); rtc->ops = &r9701_rtc_ops; + rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rtc->range_max = RTC_TIMESTAMP_END_2099; return rtc_register_device(rtc); }