diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index 4cee72d53318..6edfa705b486 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -114,6 +114,9 @@ properties: - const: renesas,r1ex24128 - const: atmel,24c128 + label: + description: Descriptive name of the EEPROM. + reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml index ac0bc5dd64d6..29b9447f3b84 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml @@ -9,12 +9,18 @@ title: Freescale Low Power Inter IC (LPI2C) for i.MX maintainers: - Anson Huang +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + properties: compatible: - enum: - - fsl,imx7ulp-lpi2c - - fsl,imx8qxp-lpi2c - - fsl,imx8qm-lpi2c + oneOf: + - enum: + - fsl,imx7ulp-lpi2c + - fsl,imx8qm-lpi2c + - items: + - const: fsl,imx8qxp-lpi2c + - const: fsl,imx7ulp-lpi2c reg: maxItems: 1 @@ -22,23 +28,34 @@ properties: interrupts: maxItems: 1 + assigned-clock-parents: true + assigned-clock-rates: true + assigned-clocks: true + clock-frequency: true + + clock-names: + maxItems: 1 + clocks: maxItems: 1 + power-domains: + maxItems: 1 + required: - compatible - reg - interrupts - clocks -additionalProperties: false +unevaluatedProperties: false examples: - | #include #include - lpi2c7@40a50000 { + i2c@40a50000 { compatible = "fsl,imx7ulp-lpi2c"; reg = <0x40A50000 0x10000>; interrupt-parent = <&intc>; diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml index 810536953177..f23966b0d6c6 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml @@ -9,6 +9,9 @@ title: Freescale Inter IC (I2C) and High Speed Inter IC (HS-I2C) for i.MX maintainers: - Wolfram Sang +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + properties: compatible: oneOf: @@ -18,6 +21,9 @@ properties: - items: - const: fsl,imx35-i2c - const: fsl,imx1-i2c + - items: + - const: fsl,imx7d-i2c + - const: fsl,imx21-i2c - items: - enum: - fsl,imx25-i2c @@ -75,7 +81,7 @@ required: - interrupts - clocks -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt index a21c359b9f02..df41f72afc87 100644 --- a/Documentation/devicetree/bindings/i2c/i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c.txt @@ -87,6 +87,11 @@ wants to support one of the below features, it should adapt these bindings. this information to detect a stalled bus more reliably, for example. Can not be combined with 'multi-master'. +- smbus + states that additional SMBus restrictions and features apply to this bus. + Examples of features are SMBusHostNotify and SMBusAlert. Examples of + restrictions are more reserved addresses and timeout definitions. + Required properties (per child device) -------------------------------------- diff --git a/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml b/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml index 682ed1bbf5c6..0e7b4b8a7e48 100644 --- a/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml @@ -17,9 +17,13 @@ properties: pattern: "^i2c@[0-9a-f]+$" compatible: - enum: - - ingenic,jz4780-i2c - - ingenic,x1000-i2c + oneOf: + - enum: + - ingenic,jz4770-i2c + - ingenic,x1000-i2c + - items: + - const: ingenic,jz4780-i2c + - const: ingenic,jz4770-i2c reg: maxItems: 1 @@ -60,7 +64,7 @@ examples: #include #include i2c@10054000 { - compatible = "ingenic,jz4780-i2c"; + compatible = "ingenic,jz4780-i2c", "ingenic,jz4770-i2c"; #address-cells = <1>; #size-cells = <0>; reg = <0x10054000 0x1000>; diff --git a/Documentation/devicetree/bindings/i2c/mellanox,i2c-mlxbf.txt b/Documentation/devicetree/bindings/i2c/mellanox,i2c-mlxbf.txt new file mode 100644 index 000000000000..566ea861aa00 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/mellanox,i2c-mlxbf.txt @@ -0,0 +1,42 @@ +Device tree configuration for the Mellanox I2C SMBus on BlueField SoCs + +Required Properties: + +- compatible : should be "mellanox,i2c-mlxbf1" or "mellanox,i2c-mlxbf2". + +- reg : address offset and length of the device registers. The + registers consist of the following set of resources: + 1) Smbus block registers. + 2) Cause master registers. + 3) Cause slave registers. + 4) Cause coalesce registers (if compatible isn't set + to "mellanox,i2c-mlxbf1"). + +- interrupts : interrupt number. + +Optional Properties: + +- clock-frequency : bus frequency used to configure timing registers; + allowed values are 100000, 400000 and 1000000; + those are expressed in Hz. Default is 100000. + +Example: + +i2c@2804000 { + compatible = "mellanox,i2c-mlxbf1"; + reg = <0x02804000 0x800>, + <0x02801200 0x020>, + <0x02801260 0x020>; + interrupts = <57>; + clock-frequency = <100000>; +}; + +i2c@2808800 { + compatible = "mellanox,i2c-mlxbf2"; + reg = <0x02808800 0x600>, + <0x02808e00 0x020>, + <0x02808e20 0x020>, + <0x02808e40 0x010>; + interrupts = <57>; + clock-frequency = <400000>; +}; diff --git a/Documentation/devicetree/bindings/rtc/microcrystal,rv3032.yaml b/Documentation/devicetree/bindings/rtc/microcrystal,rv3032.yaml new file mode 100644 index 000000000000..a2c55303810d --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/microcrystal,rv3032.yaml @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/microcrystal,rv3032.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip RV-3032 RTC Device Tree Bindings + +allOf: + - $ref: "rtc.yaml#" + +maintainers: + - Alexandre Belloni + +properties: + compatible: + const: microcrystal,rv3032 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + start-year: true + + trickle-resistor-ohms: + enum: + - 1000 + - 2000 + - 7000 + - 11000 + + trickle-voltage-millivolt: + enum: + - 1750 + - 3000 + - 4400 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + rtc@51 { + compatible = "microcrystal,rv3032"; + reg = <0x51>; + status = "okay"; + pinctrl-0 = <&rtc_nint_pins>; + interrupts-extended = <&gpio1 16 IRQ_TYPE_LEVEL_HIGH>; + trickle-resistor-ohms = <7000>; + trickle-voltage-millivolt = <1750>; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/rtc/rtc-ds1307.txt b/Documentation/devicetree/bindings/rtc/rtc-ds1307.txt index 66f0a31ae9ce..36f610bb051e 100644 --- a/Documentation/devicetree/bindings/rtc/rtc-ds1307.txt +++ b/Documentation/devicetree/bindings/rtc/rtc-ds1307.txt @@ -31,9 +31,16 @@ Optional properties: Selected resistor for trickle charger Possible values are 250, 2000, 4000 Should be given if trickle charger should be enabled -- trickle-diode-disable : ds1339, ds1340 and ds 1388 only +- aux-voltage-chargeable: ds1339, ds1340, ds1388 and rx8130 only + Tells whether the battery/supercap of the RTC (if any) is + chargeable or not. + Possible values are 0 (not chargeable), 1 (chargeable) + +Deprecated properties: +- trickle-diode-disable : ds1339, ds1340 and ds1388 only Do not use internal trickle charger diode Should be given if internal trickle charger diode should be disabled + (superseded by aux-voltage-chargeable) Example: ds1339: rtc@68 { diff --git a/Documentation/devicetree/bindings/rtc/rtc.yaml b/Documentation/devicetree/bindings/rtc/rtc.yaml index 2d055e37e6f7..8acd2de3de3a 100644 --- a/Documentation/devicetree/bindings/rtc/rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/rtc.yaml @@ -17,6 +17,15 @@ properties: $nodename: pattern: "^rtc(@.*|-[0-9a-f])*$" + aux-voltage-chargeable: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + description: | + Tells whether the battery/supercap of the RTC (if any) is + chargeable or not: + 0: not chargeable + 1: chargeable + quartz-load-femtofarads: $ref: /schemas/types.yaml#/definitions/uint32 description: @@ -35,6 +44,7 @@ properties: description: Do not use internal trickle charger diode. Should be given if internal trickle charger diode should be disabled. + deprecated: true trickle-resistor-ohms: $ref: /schemas/types.yaml#/definitions/uint32 @@ -42,6 +52,12 @@ properties: Selected resistor for trickle charger. Should be given if trickle charger should be enabled. + trickle-voltage-millivolt: + description: + Selected voltage for trickle charger. Should be given + if trickle charger should be enabled and the trickle voltage is different + from the RTC main power supply. + wakeup-source: $ref: /schemas/types.yaml#/definitions/flag description: diff --git a/Documentation/filesystems/ceph.rst b/Documentation/filesystems/ceph.rst index 0aa70750df0f..7d2ef4e27273 100644 --- a/Documentation/filesystems/ceph.rst +++ b/Documentation/filesystems/ceph.rst @@ -163,14 +163,14 @@ Mount Options to the default VFS implementation if this option is used. recover_session= - Set auto reconnect mode in the case where the client is blacklisted. The + Set auto reconnect mode in the case where the client is blocklisted. The available modes are "no" and "clean". The default is "no". * no: never attempt to reconnect when client detects that it has been - blacklisted. Operations will generally fail after being blacklisted. + blocklisted. Operations will generally fail after being blocklisted. * clean: client reconnects to the ceph cluster automatically when it - detects that it has been blacklisted. During reconnect, client drops + detects that it has been blocklisted. During reconnect, client drops dirty data/metadata, invalidates page caches and writable file handles. After reconnect, file locks become stale because the MDS loses track of them. If an inode contains any stale file locks, read/write on the diff --git a/Documentation/i2c/busses/i2c-i801.rst b/Documentation/i2c/busses/i2c-i801.rst index faf32330c335..42bbdd6e7fd8 100644 --- a/Documentation/i2c/busses/i2c-i801.rst +++ b/Documentation/i2c/busses/i2c-i801.rst @@ -44,6 +44,7 @@ Supported adapters: * Intel Tiger Lake (PCH) * Intel Jasper Lake (SOC) * Intel Emmitsburg (PCH) + * Intel Alder Lake (PCH) Datasheets: Publicly available at the Intel website diff --git a/Documentation/i2c/index.rst b/Documentation/i2c/index.rst index 8a2ad3845191..8b76217e370a 100644 --- a/Documentation/i2c/index.rst +++ b/Documentation/i2c/index.rst @@ -47,6 +47,7 @@ Slave I2C slave-interface slave-eeprom-backend + slave-testunit-backend Advanced topics =============== diff --git a/Documentation/i2c/slave-testunit-backend.rst b/Documentation/i2c/slave-testunit-backend.rst new file mode 100644 index 000000000000..2c38e64f0bac --- /dev/null +++ b/Documentation/i2c/slave-testunit-backend.rst @@ -0,0 +1,69 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================ +Linux I2C slave testunit backend +================================ + +by Wolfram Sang in 2020 + +This backend can be used to trigger test cases for I2C bus masters which +require a remote device with certain capabilities (and which are usually not so +easy to obtain). Examples include multi-master testing, and SMBus Host Notify +testing. For some tests, the I2C slave controller must be able to switch +between master and slave mode because it needs to send data, too. + +Note that this is a device for testing and debugging. It should not be enabled +in a production build. And while there is some versioning and we try hard to +keep backward compatibility, there is no stable ABI guaranteed! + +Instantiating the device is regular. Example for bus 0, address 0x30: + +# echo "slave-testunit 0x1030" > /sys/bus/i2c/devices/i2c-0/new_device + +After that, you will have a write-only device listening. Reads will just return +an 8-bit version number of the testunit. When writing, the device consists of 4 +8-bit registers and all must be written to start a testcase, i.e. you must +always write 4 bytes to the device. The registers are: + +0x00 CMD - which test to trigger +0x01 DATAL - configuration byte 1 for the test +0x02 DATAH - configuration byte 2 for the test +0x03 DELAY - delay in n * 10ms until test is started + +Using 'i2cset' from the i2c-tools package, the generic command looks like: + +# i2cset -y i + +DELAY is a generic parameter which will delay the execution of the test in CMD. +While a command is running (including the delay), new commands will not be +acknowledged. You need to wait until the old one is completed. + +The commands are described in the following section. An invalid command will +result in the transfer not being acknowledged. + +Commands +-------- + +0x00 NOOP (reserved for future use) + +0x01 READ_BYTES (also needs master mode) + DATAL - address to read data from (lower 7 bits, highest bit currently unused) + DATAH - number of bytes to read + +This is useful to test if your bus master driver is handling multi-master +correctly. You can trigger the testunit to read bytes from another device on +the bus. If the bus master under test also wants to access the bus at the same +time, the bus will be busy. Example to read 128 bytes from device 0x50 after +50ms of delay: + +# i2cset -y 0 0x30 0x01 0x50 0x80 0x05 i + +0x02 SMBUS_HOST_NOTIFY (also needs master mode) + DATAL - low byte of the status word to send + DATAH - high byte of the status word to send + +This test will send an SMBUS_HOST_NOTIFY message to the host. Note that the +status word is currently ignored in the Linux Kernel. Example to send a +notification after 10ms: + +# i2cset -y 0 0x30 0x02 0x42 0x64 0x01 i diff --git a/MAINTAINERS b/MAINTAINERS index ddc2be1d1381..a4326ade6684 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11166,6 +11166,12 @@ W: http://www.melfas.com F: Documentation/devicetree/bindings/input/touchscreen/melfas_mip4.txt F: drivers/input/touchscreen/melfas_mip4.c +MELLANOX BLUEFIELD I2C DRIVER +M: Khalil Blaiech +L: linux-i2c@vger.kernel.org +S: Supported +F: drivers/i2c/busses/i2c-mlxbf.c + MELLANOX ETHERNET DRIVER (mlx4_en) M: Tariq Toukan L: netdev@vger.kernel.org diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3e89b5d48ee6..f84128abade3 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4010,10 +4010,10 @@ static int rbd_try_lock(struct rbd_device *rbd_dev) rbd_warn(rbd_dev, "breaking header lock owned by %s%llu", ENTITY_NAME(lockers[0].id.name)); - ret = ceph_monc_blacklist_add(&client->monc, + ret = ceph_monc_blocklist_add(&client->monc, &lockers[0].info.addr); if (ret) { - rbd_warn(rbd_dev, "blacklist of %s%llu failed: %d", + rbd_warn(rbd_dev, "blocklist of %s%llu failed: %d", ENTITY_NAME(lockers[0].id.name), ret); goto out; } @@ -4077,7 +4077,7 @@ static int rbd_try_acquire_lock(struct rbd_device *rbd_dev) ret = rbd_try_lock(rbd_dev); if (ret < 0) { rbd_warn(rbd_dev, "failed to lock header: %d", ret); - if (ret == -EBLACKLISTED) + if (ret == -EBLOCKLISTED) goto out; ret = 1; /* request lock anyway */ @@ -4613,7 +4613,7 @@ static void rbd_reregister_watch(struct work_struct *work) ret = __rbd_register_watch(rbd_dev); if (ret) { rbd_warn(rbd_dev, "failed to reregister watch: %d", ret); - if (ret != -EBLACKLISTED && ret != -ENOENT) { + if (ret != -EBLOCKLISTED && ret != -ENOENT) { queue_delayed_work(rbd_dev->task_wq, &rbd_dev->watch_dwork, RBD_RETRY_DELAY); diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 5066d1f1d687..d51ca0428bb8 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -21,7 +21,7 @@ EXPORT_SYMBOL_GPL(dmi_kobj); /* * DMI stands for "Desktop Management Interface". It is part * of and an antecedent to, SMBIOS, which stands for System - * Management BIOS. See further: http://www.dmtf.org/standards + * Management BIOS. See further: https://www.dmtf.org/standards */ static const char dmi_empty_string[] = ""; diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index bae1dc08ec9a..438905e2a1d0 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -101,7 +101,6 @@ source "drivers/i2c/busses/Kconfig" config I2C_STUB tristate "I2C/SMBus Test Stub" depends on m - default 'n' help This module may be useful to developers of SMBus client drivers, especially for certain kinds of sensor chips. @@ -126,6 +125,14 @@ config I2C_SLAVE_EEPROM This backend makes Linux behave like an I2C EEPROM. Please read Documentation/i2c/slave-eeprom-backend.rst for further details. +config I2C_SLAVE_TESTUNIT + tristate "I2C eeprom testunit driver" + help + This backend can be used to trigger test cases for I2C bus masters + which require a remote device with certain capabilities, e.g. + multi-master, SMBus Host Notify, etc. Please read + Documentation/i2c/slave-testunit-backend.rst for further details. + endif config I2C_DEBUG_CORE diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile index bed6ba63c983..c1d493dc9bac 100644 --- a/drivers/i2c/Makefile +++ b/drivers/i2c/Makefile @@ -16,5 +16,6 @@ obj-$(CONFIG_I2C_MUX) += i2c-mux.o obj-y += algos/ busses/ muxes/ obj-$(CONFIG_I2C_STUB) += i2c-stub.o obj-$(CONFIG_I2C_SLAVE_EEPROM) += i2c-slave-eeprom.o +obj-$(CONFIG_I2C_SLAVE_TESTUNIT) += i2c-slave-testunit.o ccflags-$(CONFIG_I2C_DEBUG_CORE) := -DDEBUG diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 293e7a0760e7..a4f473ef4e5c 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -147,6 +147,7 @@ config I2C_I801 Tiger Lake (PCH) Jasper Lake (SOC) Emmitsburg (PCH) + Alder Lake (PCH) This driver can also be built as a module. If so, the module will be called i2c-i801. @@ -730,6 +731,19 @@ config I2C_LPC2K This driver can also be built as a module. If so, the module will be called i2c-lpc2k. +config I2C_MLXBF + tristate "Mellanox BlueField I2C controller" + depends on ARM64 + help + Enabling this option will add I2C SMBus support for Mellanox BlueField + system. + + This driver can also be built as a module. If so, the module will be + called i2c-mlxbf. + + This driver implements an I2C SMBus host controller and enables both + master and slave functions. + config I2C_MESON tristate "Amlogic Meson I2C controller" depends on ARCH_MESON || COMPILE_TEST @@ -840,7 +854,6 @@ config I2C_PASEMI config I2C_PCA_PLATFORM tristate "PCA9564/PCA9665 as platform device" select I2C_ALGOPCA - default n help This driver supports a memory mapped Philips PCA9564/PCA9665 parallel bus to I2C bus controller. @@ -1026,6 +1039,7 @@ config I2C_STM32F7 tristate "STMicroelectronics STM32F7 I2C support" depends on ARCH_STM32 || COMPILE_TEST select I2C_SLAVE + select I2C_SMBUS help Enable this option to add support for STM32 I2C controller embedded in STM32F7 SoCs. @@ -1181,6 +1195,8 @@ config I2C_RCAR tristate "Renesas R-Car I2C Controller" depends on ARCH_RENESAS || COMPILE_TEST select I2C_SLAVE + select I2C_SMBUS + select RESET_CONTROLLER if ARCH_RCAR_GEN3 help If you say yes to this option, support will be included for the R-Car I2C controller. @@ -1240,7 +1256,6 @@ config I2C_TAOS_EVM depends on TTY select SERIO select SERIO_SERPORT - default n help This supports TAOS evaluation modules on serial port. In order to use this driver, you will need the inputattach tool, which is part @@ -1324,7 +1339,6 @@ config I2C_PCA_ISA tristate "PCA9564/PCA9665 on an ISA bus" depends on ISA select I2C_ALGOPCA - default n help This driver supports ISA boards using the Philips PCA9564/PCA9665 parallel bus to I2C bus controller. diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 19aff0e45cb5..683c49faca05 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -140,6 +140,7 @@ obj-$(CONFIG_I2C_BRCMSTB) += i2c-brcmstb.o obj-$(CONFIG_I2C_CROS_EC_TUNNEL) += i2c-cros-ec-tunnel.o obj-$(CONFIG_I2C_ELEKTOR) += i2c-elektor.o obj-$(CONFIG_I2C_ICY) += i2c-icy.o +obj-$(CONFIG_I2C_MLXBF) += i2c-mlxbf.o obj-$(CONFIG_I2C_MLXCPLD) += i2c-mlxcpld.o obj-$(CONFIG_I2C_OPAL) += i2c-opal.o obj-$(CONFIG_I2C_PCA_ISA) += i2c-pca-isa.o diff --git a/drivers/i2c/busses/i2c-amd-mp2-plat.c b/drivers/i2c/busses/i2c-amd-mp2-plat.c index 17df9e8845b6..506433bc0ff2 100644 --- a/drivers/i2c/busses/i2c-amd-mp2-plat.c +++ b/drivers/i2c/busses/i2c-amd-mp2-plat.c @@ -155,7 +155,7 @@ static int i2c_amd_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) struct amd_i2c_dev *i2c_dev = i2c_get_adapdata(adap); int i; struct i2c_msg *pmsg; - int err; + int err = 0; /* the adapter might have been deleted while waiting for the bus lock */ if (unlikely(!i2c_dev->common.mp2_dev)) diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index 5dc519516292..37443edbf754 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -421,11 +421,9 @@ static int bcm2835_i2c_probe(struct platform_device *pdev) return PTR_ERR(i2c_dev->regs); mclk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(mclk)) { - if (PTR_ERR(mclk) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Could not get clock\n"); - return PTR_ERR(mclk); - } + if (IS_ERR(mclk)) + return dev_err_probe(&pdev->dev, PTR_ERR(mclk), + "Could not get clock\n"); i2c_dev->bus_clk = bcm2835_i2c_register_div(&pdev->dev, mclk, i2c_dev); diff --git a/drivers/i2c/busses/i2c-efm32.c b/drivers/i2c/busses/i2c-efm32.c index 838ce0947191..f6e13ceeb2b3 100644 --- a/drivers/i2c/busses/i2c-efm32.c +++ b/drivers/i2c/busses/i2c-efm32.c @@ -332,21 +332,15 @@ static int efm32_i2c_probe(struct platform_device *pdev) return ret; } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "failed to determine base address\n"); - return -ENODEV; - } + ddata->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(ddata->base)) + return PTR_ERR(ddata->base); if (resource_size(res) < 0x42) { dev_err(&pdev->dev, "memory resource too small\n"); return -EINVAL; } - ddata->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(ddata->base)) - return PTR_ERR(ddata->base); - ret = platform_get_irq(pdev, 0); if (ret <= 0) { if (!ret) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index bffca729e1c7..ae90713443fa 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -71,6 +71,7 @@ * Tiger Lake-H (PCH) 0x43a3 32 hard yes yes yes * Jasper Lake (SOC) 0x4da3 32 hard yes yes yes * Comet Lake-V (PCH) 0xa3a3 32 hard yes yes yes + * Alder Lake-S (PCH) 0x7aa3 32 hard yes yes yes * * Features supported by this driver: * Software PEC no @@ -228,6 +229,7 @@ #define PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS 0x4b23 #define PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS 0x4da3 #define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS 0x5ad4 +#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_S_SMBUS 0x7aa3 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_SMBUS 0x8c22 #define PCI_DEVICE_ID_INTEL_WILDCATPOINT_SMBUS 0x8ca2 #define PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS 0x8d22 @@ -1081,6 +1083,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ALDER_LAKE_S_SMBUS) }, { 0, } }; @@ -1274,6 +1277,7 @@ static const struct { /* * Additional individual entries were added after verification. */ + { "Latitude 5480", 0x29 }, { "Vostro V131", 0x1d }, }; @@ -1767,6 +1771,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS: case PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS: case PCI_DEVICE_ID_INTEL_EBG_SMBUS: + case PCI_DEVICE_ID_INTEL_ALDER_LAKE_S_SMBUS: priv->features |= FEATURE_BLOCK_PROC; priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 0ab5381aa012..c98529c76348 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1159,11 +1159,9 @@ static int i2c_imx_probe(struct platform_device *pdev) /* Get I2C clock */ i2c_imx->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(i2c_imx->clk)) { - if (PTR_ERR(i2c_imx->clk) != -EPROBE_DEFER) - dev_err(&pdev->dev, "can't get I2C clock\n"); - return PTR_ERR(i2c_imx->clk); - } + if (IS_ERR(i2c_imx->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(i2c_imx->clk), + "can't get I2C clock\n"); ret = clk_prepare_enable(i2c_imx->clk); if (ret) { @@ -1171,14 +1169,6 @@ static int i2c_imx_probe(struct platform_device *pdev) return ret; } - /* Request IRQ */ - ret = devm_request_irq(&pdev->dev, irq, i2c_imx_isr, IRQF_SHARED, - pdev->name, i2c_imx); - if (ret) { - dev_err(&pdev->dev, "can't claim irq %d\n", irq); - goto clk_disable; - } - /* Init queue */ init_waitqueue_head(&i2c_imx->queue); @@ -1197,6 +1187,14 @@ static int i2c_imx_probe(struct platform_device *pdev) if (ret < 0) goto rpm_disable; + /* Request IRQ */ + ret = request_threaded_irq(irq, i2c_imx_isr, NULL, IRQF_SHARED, + pdev->name, i2c_imx); + if (ret) { + dev_err(&pdev->dev, "can't claim irq %d\n", irq); + goto rpm_disable; + } + /* Set up clock divider */ i2c_imx->bitrate = I2C_MAX_STANDARD_MODE_FREQ; ret = of_property_read_u32(pdev->dev.of_node, @@ -1239,13 +1237,12 @@ static int i2c_imx_probe(struct platform_device *pdev) clk_notifier_unregister: clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); + free_irq(irq, i2c_imx); rpm_disable: pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); - -clk_disable: clk_disable_unprepare(i2c_imx->clk); return ret; } @@ -1253,7 +1250,7 @@ static int i2c_imx_probe(struct platform_device *pdev) static int i2c_imx_remove(struct platform_device *pdev) { struct imx_i2c_struct *i2c_imx = platform_get_drvdata(pdev); - int ret; + int irq, ret; ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) @@ -1273,6 +1270,9 @@ static int i2c_imx_remove(struct platform_device *pdev) imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2SR); clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); + irq = platform_get_irq(pdev, 0); + if (irq >= 0) + free_irq(irq, i2c_imx); clk_disable_unprepare(i2c_imx->clk); pm_runtime_put_noidle(&pdev->dev); diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index 2f95e25a10f7..a35a27c320e7 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -77,6 +77,7 @@ #define PCI_DEVICE_ID_INTEL_S1200_SMT1 0x0c5a #define PCI_DEVICE_ID_INTEL_CDF_SMT 0x18ac #define PCI_DEVICE_ID_INTEL_DNV_SMT 0x19ac +#define PCI_DEVICE_ID_INTEL_EBG_SMT 0x1bff #define PCI_DEVICE_ID_INTEL_AVOTON_SMT 0x1f15 #define ISMT_DESC_ENTRIES 2 /* number of descriptor entries */ @@ -176,14 +177,12 @@ struct ismt_priv { u8 buffer[I2C_SMBUS_BLOCK_MAX + 16]; /* temp R/W data buffer */ }; -/** - * ismt_ids - PCI device IDs supported by this driver - */ static const struct pci_device_id ismt_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_S1200_SMT0) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_S1200_SMT1) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CDF_SMT) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMT) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EBG_SMT) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AVOTON_SMT) }, { 0, } }; @@ -197,6 +196,8 @@ MODULE_PARM_DESC(bus_speed, "Bus Speed in kHz (0 = BIOS default)"); /** * __ismt_desc_dump() - dump the contents of a specific descriptor + * @dev: the iSMT device + * @desc: the iSMT hardware descriptor */ static void __ismt_desc_dump(struct device *dev, const struct ismt_desc *desc) { @@ -628,11 +629,6 @@ static u32 ismt_func(struct i2c_adapter *adap) I2C_FUNC_SMBUS_PEC; } -/** - * smbus_algorithm - the adapter algorithm and supported functionality - * @smbus_xfer: the adapter algorithm - * @functionality: functionality supported by the adapter - */ static const struct i2c_algorithm smbus_algorithm = { .smbus_xfer = ismt_access, .functionality = ismt_func, diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index ba831df6661e..cb4a25ebb890 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -752,6 +752,7 @@ static const struct ingenic_i2c_config x1000_i2c_config = { }; static const struct of_device_id jz4780_i2c_of_matches[] = { + { .compatible = "ingenic,jz4770-i2c", .data = &jz4780_i2c_config }, { .compatible = "ingenic,jz4780-i2c", .data = &jz4780_i2c_config }, { .compatible = "ingenic,x1000-i2c", .data = &x1000_i2c_config }, { /* sentinel */ } @@ -856,7 +857,7 @@ static struct platform_driver jz4780_i2c_driver = { .remove = jz4780_i2c_remove, .driver = { .name = "jz4780-i2c", - .of_match_table = of_match_ptr(jz4780_i2c_of_matches), + .of_match_table = jz4780_i2c_of_matches, }, }; diff --git a/drivers/i2c/busses/i2c-mlxbf.c b/drivers/i2c/busses/i2c-mlxbf.c new file mode 100644 index 000000000000..ee59e0da082d --- /dev/null +++ b/drivers/i2c/busses/i2c-mlxbf.c @@ -0,0 +1,2506 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Mellanox BlueField I2C bus driver + * + * Copyright (C) 2020 Mellanox Technologies, Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Defines what functionality is present. */ +#define MLXBF_I2C_FUNC_SMBUS_BLOCK \ + (I2C_FUNC_SMBUS_BLOCK_DATA | I2C_FUNC_SMBUS_BLOCK_PROC_CALL) + +#define MLXBF_I2C_FUNC_SMBUS_DEFAULT \ + (I2C_FUNC_SMBUS_BYTE | I2C_FUNC_SMBUS_BYTE_DATA | \ + I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_I2C_BLOCK | \ + I2C_FUNC_SMBUS_PROC_CALL) + +#define MLXBF_I2C_FUNC_ALL \ + (MLXBF_I2C_FUNC_SMBUS_DEFAULT | MLXBF_I2C_FUNC_SMBUS_BLOCK | \ + I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SLAVE) + +#define MLXBF_I2C_SMBUS_MAX 3 + +/* Shared resources info in BlueField platforms. */ + +#define MLXBF_I2C_COALESCE_TYU_ADDR 0x02801300 +#define MLXBF_I2C_COALESCE_TYU_SIZE 0x010 + +#define MLXBF_I2C_GPIO_TYU_ADDR 0x02802000 +#define MLXBF_I2C_GPIO_TYU_SIZE 0x100 + +#define MLXBF_I2C_COREPLL_TYU_ADDR 0x02800358 +#define MLXBF_I2C_COREPLL_TYU_SIZE 0x008 + +#define MLXBF_I2C_COREPLL_YU_ADDR 0x02800c30 +#define MLXBF_I2C_COREPLL_YU_SIZE 0x00c + +#define MLXBF_I2C_SHARED_RES_MAX 3 + +/* + * Note that the following SMBus, CAUSE, GPIO and PLL register addresses + * refer to their respective offsets relative to the corresponding + * memory-mapped region whose addresses are specified in either the DT or + * the ACPI tables or above. + */ + +/* + * SMBus Master core clock frequency. Timing configurations are + * strongly dependent on the core clock frequency of the SMBus + * Master. Default value is set to 400MHz. + */ +#define MLXBF_I2C_TYU_PLL_OUT_FREQ (400 * 1000 * 1000) +/* Reference clock for Bluefield 1 - 156 MHz. */ +#define MLXBF_I2C_TYU_PLL_IN_FREQ (156 * 1000 * 1000) +/* Reference clock for BlueField 2 - 200 MHz. */ +#define MLXBF_I2C_YU_PLL_IN_FREQ (200 * 1000 * 1000) + +/* Constant used to determine the PLL frequency. */ +#define MLNXBF_I2C_COREPLL_CONST 16384 + +/* PLL registers. */ +#define MLXBF_I2C_CORE_PLL_REG0 0x0 +#define MLXBF_I2C_CORE_PLL_REG1 0x4 +#define MLXBF_I2C_CORE_PLL_REG2 0x8 + +/* OR cause register. */ +#define MLXBF_I2C_CAUSE_OR_EVTEN0 0x14 +#define MLXBF_I2C_CAUSE_OR_CLEAR 0x18 + +/* Arbiter Cause Register. */ +#define MLXBF_I2C_CAUSE_ARBITER 0x1c + +/* + * Cause Status flags. Note that those bits might be considered + * as interrupt enabled bits. + */ + +/* Transaction ended with STOP. */ +#define MLXBF_I2C_CAUSE_TRANSACTION_ENDED BIT(0) +/* Master arbitration lost. */ +#define MLXBF_I2C_CAUSE_M_ARBITRATION_LOST BIT(1) +/* Unexpected start detected. */ +#define MLXBF_I2C_CAUSE_UNEXPECTED_START BIT(2) +/* Unexpected stop detected. */ +#define MLXBF_I2C_CAUSE_UNEXPECTED_STOP BIT(3) +/* Wait for transfer continuation. */ +#define MLXBF_I2C_CAUSE_WAIT_FOR_FW_DATA BIT(4) +/* Failed to generate STOP. */ +#define MLXBF_I2C_CAUSE_PUT_STOP_FAILED BIT(5) +/* Failed to generate START. */ +#define MLXBF_I2C_CAUSE_PUT_START_FAILED BIT(6) +/* Clock toggle completed. */ +#define MLXBF_I2C_CAUSE_CLK_TOGGLE_DONE BIT(7) +/* Transfer timeout occurred. */ +#define MLXBF_I2C_CAUSE_M_FW_TIMEOUT BIT(8) +/* Master busy bit reset. */ +#define MLXBF_I2C_CAUSE_M_GW_BUSY_FALL BIT(9) + +#define MLXBF_I2C_CAUSE_MASTER_ARBITER_BITS_MASK GENMASK(9, 0) + +#define MLXBF_I2C_CAUSE_MASTER_STATUS_ERROR \ + (MLXBF_I2C_CAUSE_M_ARBITRATION_LOST | \ + MLXBF_I2C_CAUSE_UNEXPECTED_START | \ + MLXBF_I2C_CAUSE_UNEXPECTED_STOP | \ + MLXBF_I2C_CAUSE_PUT_STOP_FAILED | \ + MLXBF_I2C_CAUSE_PUT_START_FAILED | \ + MLXBF_I2C_CAUSE_CLK_TOGGLE_DONE | \ + MLXBF_I2C_CAUSE_M_FW_TIMEOUT) + +/* + * Slave cause status flags. Note that those bits might be considered + * as interrupt enabled bits. + */ + +/* Write transaction received successfully. */ +#define MLXBF_I2C_CAUSE_WRITE_SUCCESS BIT(0) +/* Read transaction received, waiting for response. */ +#define MLXBF_I2C_CAUSE_READ_WAIT_FW_RESPONSE BIT(13) +/* Slave busy bit reset. */ +#define MLXBF_I2C_CAUSE_S_GW_BUSY_FALL BIT(18) + +#define MLXBF_I2C_CAUSE_SLAVE_ARBITER_BITS_MASK GENMASK(20, 0) + +/* Cause coalesce registers. */ +#define MLXBF_I2C_CAUSE_COALESCE_0 0x00 +#define MLXBF_I2C_CAUSE_COALESCE_1 0x04 +#define MLXBF_I2C_CAUSE_COALESCE_2 0x08 + +#define MLXBF_I2C_CAUSE_TYU_SLAVE_BIT MLXBF_I2C_SMBUS_MAX +#define MLXBF_I2C_CAUSE_YU_SLAVE_BIT 1 + +/* Functional enable register. */ +#define MLXBF_I2C_GPIO_0_FUNC_EN_0 0x28 +/* Force OE enable register. */ +#define MLXBF_I2C_GPIO_0_FORCE_OE_EN 0x30 +/* + * Note that Smbus GWs are on GPIOs 30:25. Two pins are used to control + * SDA/SCL lines: + * + * SMBUS GW0 -> bits[26:25] + * SMBUS GW1 -> bits[28:27] + * SMBUS GW2 -> bits[30:29] + */ +#define MLXBF_I2C_GPIO_SMBUS_GW_PINS(num) (25 + ((num) << 1)) + +/* Note that gw_id can be 0,1 or 2. */ +#define MLXBF_I2C_GPIO_SMBUS_GW_MASK(num) \ + (0xffffffff & (~(0x3 << MLXBF_I2C_GPIO_SMBUS_GW_PINS(num)))) + +#define MLXBF_I2C_GPIO_SMBUS_GW_RESET_PINS(num, val) \ + ((val) & MLXBF_I2C_GPIO_SMBUS_GW_MASK(num)) + +#define MLXBF_I2C_GPIO_SMBUS_GW_ASSERT_PINS(num, val) \ + ((val) | (0x3 << MLXBF_I2C_GPIO_SMBUS_GW_PINS(num))) + +/* SMBus timing parameters. */ +#define MLXBF_I2C_SMBUS_TIMER_SCL_LOW_SCL_HIGH 0x00 +#define MLXBF_I2C_SMBUS_TIMER_FALL_RISE_SPIKE 0x04 +#define MLXBF_I2C_SMBUS_TIMER_THOLD 0x08 +#define MLXBF_I2C_SMBUS_TIMER_TSETUP_START_STOP 0x0c +#define MLXBF_I2C_SMBUS_TIMER_TSETUP_DATA 0x10 +#define MLXBF_I2C_SMBUS_THIGH_MAX_TBUF 0x14 +#define MLXBF_I2C_SMBUS_SCL_LOW_TIMEOUT 0x18 + +enum { + MLXBF_I2C_TIMING_100KHZ = 100000, + MLXBF_I2C_TIMING_400KHZ = 400000, + MLXBF_I2C_TIMING_1000KHZ = 1000000, +}; + +/* + * Defines SMBus operating frequency and core clock frequency. + * According to ADB files, default values are compliant to 100KHz SMBus + * @ 400MHz core clock. The driver should be able to calculate core + * frequency based on PLL parameters. + */ +#define MLXBF_I2C_COREPLL_FREQ MLXBF_I2C_TYU_PLL_OUT_FREQ + +/* Core PLL TYU configuration. */ +#define MLXBF_I2C_COREPLL_CORE_F_TYU_MASK GENMASK(12, 0) +#define MLXBF_I2C_COREPLL_CORE_OD_TYU_MASK GENMASK(3, 0) +#define MLXBF_I2C_COREPLL_CORE_R_TYU_MASK GENMASK(5, 0) + +#define MLXBF_I2C_COREPLL_CORE_F_TYU_SHIFT 3 +#define MLXBF_I2C_COREPLL_CORE_OD_TYU_SHIFT 16 +#define MLXBF_I2C_COREPLL_CORE_R_TYU_SHIFT 20 + +/* Core PLL YU configuration. */ +#define MLXBF_I2C_COREPLL_CORE_F_YU_MASK GENMASK(25, 0) +#define MLXBF_I2C_COREPLL_CORE_OD_YU_MASK GENMASK(3, 0) +#define MLXBF_I2C_COREPLL_CORE_R_YU_MASK GENMASK(5, 0) + +#define MLXBF_I2C_COREPLL_CORE_F_YU_SHIFT 0 +#define MLXBF_I2C_COREPLL_CORE_OD_YU_SHIFT 1 +#define MLXBF_I2C_COREPLL_CORE_R_YU_SHIFT 26 + +/* Core PLL frequency. */ +static u64 mlxbf_i2c_corepll_frequency; + +/* SMBus Master GW. */ +#define MLXBF_I2C_SMBUS_MASTER_GW 0x200 +/* Number of bytes received and sent. */ +#define MLXBF_I2C_SMBUS_RS_BYTES 0x300 +/* Packet error check (PEC) value. */ +#define MLXBF_I2C_SMBUS_MASTER_PEC 0x304 +/* Status bits (ACK/NACK/FW Timeout). */ +#define MLXBF_I2C_SMBUS_MASTER_STATUS 0x308 +/* SMbus Master Finite State Machine. */ +#define MLXBF_I2C_SMBUS_MASTER_FSM 0x310 + +/* + * When enabled, the master will issue a stop condition in case of + * timeout while waiting for FW response. + */ +#define MLXBF_I2C_SMBUS_EN_FW_TIMEOUT 0x31c + +/* SMBus master GW control bits offset in MLXBF_I2C_SMBUS_MASTER_GW[31:3]. */ +#define MLXBF_I2C_MASTER_LOCK_BIT BIT(31) /* Lock bit. */ +#define MLXBF_I2C_MASTER_BUSY_BIT BIT(30) /* Busy bit. */ +#define MLXBF_I2C_MASTER_START_BIT BIT(29) /* Control start. */ +#define MLXBF_I2C_MASTER_CTL_WRITE_BIT BIT(28) /* Control write phase. */ +#define MLXBF_I2C_MASTER_CTL_READ_BIT BIT(19) /* Control read phase. */ +#define MLXBF_I2C_MASTER_STOP_BIT BIT(3) /* Control stop. */ + +#define MLXBF_I2C_MASTER_ENABLE \ + (MLXBF_I2C_MASTER_LOCK_BIT | MLXBF_I2C_MASTER_BUSY_BIT | \ + MLXBF_I2C_MASTER_START_BIT | MLXBF_I2C_MASTER_STOP_BIT) + +#define MLXBF_I2C_MASTER_ENABLE_WRITE \ + (MLXBF_I2C_MASTER_ENABLE | MLXBF_I2C_MASTER_CTL_WRITE_BIT) + +#define MLXBF_I2C_MASTER_ENABLE_READ \ + (MLXBF_I2C_MASTER_ENABLE | MLXBF_I2C_MASTER_CTL_READ_BIT) + +#define MLXBF_I2C_MASTER_SLV_ADDR_SHIFT 12 /* Slave address shift. */ +#define MLXBF_I2C_MASTER_WRITE_SHIFT 21 /* Control write bytes shift. */ +#define MLXBF_I2C_MASTER_SEND_PEC_SHIFT 20 /* Send PEC byte shift. */ +#define MLXBF_I2C_MASTER_PARSE_EXP_SHIFT 11 /* Parse expected bytes shift. */ +#define MLXBF_I2C_MASTER_READ_SHIFT 4 /* Control read bytes shift. */ + +/* SMBus master GW Data descriptor. */ +#define MLXBF_I2C_MASTER_DATA_DESC_ADDR 0x280 +#define MLXBF_I2C_MASTER_DATA_DESC_SIZE 0x80 /* Size in bytes. */ + +/* Maximum bytes to read/write per SMBus transaction. */ +#define MLXBF_I2C_MASTER_DATA_R_LENGTH MLXBF_I2C_MASTER_DATA_DESC_SIZE +#define MLXBF_I2C_MASTER_DATA_W_LENGTH (MLXBF_I2C_MASTER_DATA_DESC_SIZE - 1) + +/* All bytes were transmitted. */ +#define MLXBF_I2C_SMBUS_STATUS_BYTE_CNT_DONE BIT(0) +/* NACK received. */ +#define MLXBF_I2C_SMBUS_STATUS_NACK_RCV BIT(1) +/* Slave's byte count >128 bytes. */ +#define MLXBF_I2C_SMBUS_STATUS_READ_ERR BIT(2) +/* Timeout occurred. */ +#define MLXBF_I2C_SMBUS_STATUS_FW_TIMEOUT BIT(3) + +#define MLXBF_I2C_SMBUS_MASTER_STATUS_MASK GENMASK(3, 0) + +#define MLXBF_I2C_SMBUS_MASTER_STATUS_ERROR \ + (MLXBF_I2C_SMBUS_STATUS_NACK_RCV | \ + MLXBF_I2C_SMBUS_STATUS_READ_ERR | \ + MLXBF_I2C_SMBUS_STATUS_FW_TIMEOUT) + +#define MLXBF_I2C_SMBUS_MASTER_FSM_STOP_MASK BIT(31) +#define MLXBF_I2C_SMBUS_MASTER_FSM_PS_STATE_MASK BIT(15) + +/* SMBus slave GW. */ +#define MLXBF_I2C_SMBUS_SLAVE_GW 0x400 +/* Number of bytes received and sent from/to master. */ +#define MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES 0x500 +/* Packet error check (PEC) value. */ +#define MLXBF_I2C_SMBUS_SLAVE_PEC 0x504 +/* SMBus slave Finite State Machine (FSM). */ +#define MLXBF_I2C_SMBUS_SLAVE_FSM 0x510 +/* + * Should be set when all raised causes handled, and cleared by HW on + * every new cause. + */ +#define MLXBF_I2C_SMBUS_SLAVE_READY 0x52c + +/* SMBus slave GW control bits offset in MLXBF_I2C_SMBUS_SLAVE_GW[31:19]. */ +#define MLXBF_I2C_SLAVE_BUSY_BIT BIT(30) /* Busy bit. */ +#define MLXBF_I2C_SLAVE_WRITE_BIT BIT(29) /* Control write enable. */ + +#define MLXBF_I2C_SLAVE_ENABLE \ + (MLXBF_I2C_SLAVE_BUSY_BIT | MLXBF_I2C_SLAVE_WRITE_BIT) + +#define MLXBF_I2C_SLAVE_WRITE_BYTES_SHIFT 22 /* Number of bytes to write. */ +#define MLXBF_I2C_SLAVE_SEND_PEC_SHIFT 21 /* Send PEC byte shift. */ + +/* SMBus slave GW Data descriptor. */ +#define MLXBF_I2C_SLAVE_DATA_DESC_ADDR 0x480 +#define MLXBF_I2C_SLAVE_DATA_DESC_SIZE 0x80 /* Size in bytes. */ + +/* SMbus slave configuration registers. */ +#define MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG 0x514 +#define MLXBF_I2C_SMBUS_SLAVE_ADDR_CNT 16 +#define MLXBF_I2C_SMBUS_SLAVE_ADDR_EN_BIT 7 +#define MLXBF_I2C_SMBUS_SLAVE_ADDR_MASK GENMASK(6, 0) + +#define MLXBF_I2C_SLAVE_ADDR_ENABLED(addr) \ + ((addr) & (1 << MLXBF_I2C_SMBUS_SLAVE_ADDR_EN_BIT)) + +/* + * Timeout is given in microsends. Note also that timeout handling is not + * exact. + */ +#define MLXBF_I2C_SMBUS_TIMEOUT (300 * 1000) /* 300ms */ + +/* Encapsulates timing parameters. */ +struct mlxbf_i2c_timings { + u16 scl_high; /* Clock high period. */ + u16 scl_low; /* Clock low period. */ + u8 sda_rise; /* Data rise time. */ + u8 sda_fall; /* Data fall time. */ + u8 scl_rise; /* Clock rise time. */ + u8 scl_fall; /* Clock fall time. */ + u16 hold_start; /* Hold time after (REPEATED) START. */ + u16 hold_data; /* Data hold time. */ + u16 setup_start; /* REPEATED START condition setup time. */ + u16 setup_stop; /* STOP condition setup time. */ + u16 setup_data; /* Data setup time. */ + u16 pad; /* Padding. */ + u16 buf; /* Bus free time between STOP and START. */ + u16 thigh_max; /* Thigh max. */ + u32 timeout; /* Detect clock low timeout. */ +}; + +enum { + MLXBF_I2C_F_READ = BIT(0), + MLXBF_I2C_F_WRITE = BIT(1), + MLXBF_I2C_F_NORESTART = BIT(3), + MLXBF_I2C_F_SMBUS_OPERATION = BIT(4), + MLXBF_I2C_F_SMBUS_BLOCK = BIT(5), + MLXBF_I2C_F_SMBUS_PEC = BIT(6), + MLXBF_I2C_F_SMBUS_PROCESS_CALL = BIT(7), +}; + +struct mlxbf_i2c_smbus_operation { + u32 flags; + u32 length; /* Buffer length in bytes. */ + u8 *buffer; +}; + +#define MLXBF_I2C_SMBUS_OP_CNT_1 1 +#define MLXBF_I2C_SMBUS_OP_CNT_2 2 +#define MLXBF_I2C_SMBUS_OP_CNT_3 3 +#define MLXBF_I2C_SMBUS_MAX_OP_CNT MLXBF_I2C_SMBUS_OP_CNT_3 + +struct mlxbf_i2c_smbus_request { + u8 slave; + u8 operation_cnt; + struct mlxbf_i2c_smbus_operation operation[MLXBF_I2C_SMBUS_MAX_OP_CNT]; +}; + +struct mlxbf_i2c_resource { + void __iomem *io; + struct resource *params; + struct mutex *lock; /* Mutex to protect mlxbf_i2c_resource. */ + u8 type; +}; + +/* List of chip resources that are being accessed by the driver. */ +enum { + MLXBF_I2C_SMBUS_RES, + MLXBF_I2C_MST_CAUSE_RES, + MLXBF_I2C_SLV_CAUSE_RES, + MLXBF_I2C_COALESCE_RES, + MLXBF_I2C_COREPLL_RES, + MLXBF_I2C_GPIO_RES, + MLXBF_I2C_END_RES, +}; + +/* Helper macro to define an I2C resource parameters. */ +#define MLXBF_I2C_RES_PARAMS(addr, size, str) \ + { \ + .start = (addr), \ + .end = (addr) + (size) - 1, \ + .name = (str) \ + } + +static struct resource mlxbf_i2c_coalesce_tyu_params = + MLXBF_I2C_RES_PARAMS(MLXBF_I2C_COALESCE_TYU_ADDR, + MLXBF_I2C_COALESCE_TYU_SIZE, + "COALESCE_MEM"); +static struct resource mlxbf_i2c_corepll_tyu_params = + MLXBF_I2C_RES_PARAMS(MLXBF_I2C_COREPLL_TYU_ADDR, + MLXBF_I2C_COREPLL_TYU_SIZE, + "COREPLL_MEM"); +static struct resource mlxbf_i2c_corepll_yu_params = + MLXBF_I2C_RES_PARAMS(MLXBF_I2C_COREPLL_YU_ADDR, + MLXBF_I2C_COREPLL_YU_SIZE, + "COREPLL_MEM"); +static struct resource mlxbf_i2c_gpio_tyu_params = + MLXBF_I2C_RES_PARAMS(MLXBF_I2C_GPIO_TYU_ADDR, + MLXBF_I2C_GPIO_TYU_SIZE, + "GPIO_MEM"); + +static struct mutex mlxbf_i2c_coalesce_lock; +static struct mutex mlxbf_i2c_corepll_lock; +static struct mutex mlxbf_i2c_gpio_lock; + +/* Mellanox BlueField chip type. */ +enum mlxbf_i2c_chip_type { + MLXBF_I2C_CHIP_TYPE_1, /* Mellanox BlueField-1 chip. */ + MLXBF_I2C_CHIP_TYPE_2, /* Mallanox BlueField-2 chip. */ +}; + +struct mlxbf_i2c_chip_info { + enum mlxbf_i2c_chip_type type; + /* Chip shared resources that are being used by the I2C controller. */ + struct mlxbf_i2c_resource *shared_res[MLXBF_I2C_SHARED_RES_MAX]; + + /* Callback to calculate the core PLL frequency. */ + u64 (*calculate_freq)(struct mlxbf_i2c_resource *corepll_res); +}; + +struct mlxbf_i2c_priv { + const struct mlxbf_i2c_chip_info *chip; + struct i2c_adapter adap; + struct mlxbf_i2c_resource *smbus; + struct mlxbf_i2c_resource *mst_cause; + struct mlxbf_i2c_resource *slv_cause; + struct mlxbf_i2c_resource *coalesce; + u64 frequency; /* Core frequency in Hz. */ + int bus; /* Physical bus identifier. */ + int irq; + struct i2c_client *slave; +}; + +static struct mlxbf_i2c_resource mlxbf_i2c_coalesce_res[] = { + [MLXBF_I2C_CHIP_TYPE_1] = { + .params = &mlxbf_i2c_coalesce_tyu_params, + .lock = &mlxbf_i2c_coalesce_lock, + .type = MLXBF_I2C_COALESCE_RES + }, + {} +}; + +static struct mlxbf_i2c_resource mlxbf_i2c_corepll_res[] = { + [MLXBF_I2C_CHIP_TYPE_1] = { + .params = &mlxbf_i2c_corepll_tyu_params, + .lock = &mlxbf_i2c_corepll_lock, + .type = MLXBF_I2C_COREPLL_RES + }, + [MLXBF_I2C_CHIP_TYPE_2] = { + .params = &mlxbf_i2c_corepll_yu_params, + .lock = &mlxbf_i2c_corepll_lock, + .type = MLXBF_I2C_COREPLL_RES, + } +}; + +static struct mlxbf_i2c_resource mlxbf_i2c_gpio_res[] = { + [MLXBF_I2C_CHIP_TYPE_1] = { + .params = &mlxbf_i2c_gpio_tyu_params, + .lock = &mlxbf_i2c_gpio_lock, + .type = MLXBF_I2C_GPIO_RES + }, + {} +}; + +static u8 mlxbf_i2c_bus_count; + +static struct mutex mlxbf_i2c_bus_lock; + +/* Polling frequency in microseconds. */ +#define MLXBF_I2C_POLL_FREQ_IN_USEC 200 + +#define MLXBF_I2C_SHIFT_0 0 +#define MLXBF_I2C_SHIFT_8 8 +#define MLXBF_I2C_SHIFT_16 16 +#define MLXBF_I2C_SHIFT_24 24 + +#define MLXBF_I2C_MASK_8 GENMASK(7, 0) +#define MLXBF_I2C_MASK_16 GENMASK(15, 0) + +#define MLXBF_I2C_FREQUENCY_1GHZ 1000000000 + +static void mlxbf_i2c_write(void __iomem *io, int reg, u32 val) +{ + writel(val, io + reg); +} + +static u32 mlxbf_i2c_read(void __iomem *io, int reg) +{ + return readl(io + reg); +} + +/* + * This function is used to read data from Master GW Data Descriptor. + * Data bytes in the Master GW Data Descriptor are shifted left so the + * data starts at the MSB of the descriptor registers as set by the + * underlying hardware. TYU_READ_DATA enables byte swapping while + * reading data bytes, and MUST be called by the SMBus read routines + * to copy data from the 32 * 32-bit HW Data registers a.k.a Master GW + * Data Descriptor. + */ +static u32 mlxbf_i2c_read_data(void __iomem *io, int reg) +{ + return (u32)be32_to_cpu(mlxbf_i2c_read(io, reg)); +} + +/* + * This function is used to write data to the Master GW Data Descriptor. + * Data copied to the Master GW Data Descriptor MUST be shifted left so + * the data starts at the MSB of the descriptor registers as required by + * the underlying hardware. TYU_WRITE_DATA enables byte swapping when + * writing data bytes, and MUST be called by the SMBus write routines to + * copy data to the 32 * 32-bit HW Data registers a.k.a Master GW Data + * Descriptor. + */ +static void mlxbf_i2c_write_data(void __iomem *io, int reg, u32 val) +{ + mlxbf_i2c_write(io, reg, (u32)cpu_to_be32(val)); +} + +/* + * Function to poll a set of bits at a specific address; it checks whether + * the bits are equal to zero when eq_zero is set to 'true', and not equal + * to zero when eq_zero is set to 'false'. + * Note that the timeout is given in microseconds. + */ +static u32 mlxbf_smbus_poll(void __iomem *io, u32 addr, u32 mask, + bool eq_zero, u32 timeout) +{ + u32 bits; + + timeout = (timeout / MLXBF_I2C_POLL_FREQ_IN_USEC) + 1; + + do { + bits = mlxbf_i2c_read(io, addr) & mask; + if (eq_zero ? bits == 0 : bits != 0) + return eq_zero ? 1 : bits; + udelay(MLXBF_I2C_POLL_FREQ_IN_USEC); + } while (timeout-- != 0); + + return 0; +} + +/* + * SW must make sure that the SMBus Master GW is idle before starting + * a transaction. Accordingly, this function polls the Master FSM stop + * bit; it returns false when the bit is asserted, true if not. + */ +static bool mlxbf_smbus_master_wait_for_idle(struct mlxbf_i2c_priv *priv) +{ + u32 mask = MLXBF_I2C_SMBUS_MASTER_FSM_STOP_MASK; + u32 addr = MLXBF_I2C_SMBUS_MASTER_FSM; + u32 timeout = MLXBF_I2C_SMBUS_TIMEOUT; + + if (mlxbf_smbus_poll(priv->smbus->io, addr, mask, true, timeout)) + return true; + + return false; +} + +static bool mlxbf_i2c_smbus_transaction_success(u32 master_status, + u32 cause_status) +{ + /* + * When transaction ended with STOP, all bytes were transmitted, + * and no NACK received, then the transaction ended successfully. + * On the other hand, when the GW is configured with the stop bit + * de-asserted then the SMBus expects the following GW configuration + * for transfer continuation. + */ + if ((cause_status & MLXBF_I2C_CAUSE_WAIT_FOR_FW_DATA) || + ((cause_status & MLXBF_I2C_CAUSE_TRANSACTION_ENDED) && + (master_status & MLXBF_I2C_SMBUS_STATUS_BYTE_CNT_DONE) && + !(master_status & MLXBF_I2C_SMBUS_STATUS_NACK_RCV))) + return true; + + return false; +} + +/* + * Poll SMBus master status and return transaction status, + * i.e. whether succeeded or failed. I2C and SMBus fault codes + * are returned as negative numbers from most calls, with zero + * or some positive number indicating a non-fault return. + */ +static int mlxbf_i2c_smbus_check_status(struct mlxbf_i2c_priv *priv) +{ + u32 master_status_bits; + u32 cause_status_bits; + + /* + * GW busy bit is raised by the driver and cleared by the HW + * when the transaction is completed. The busy bit is a good + * indicator of transaction status. So poll the busy bit, and + * then read the cause and master status bits to determine if + * errors occurred during the transaction. + */ + mlxbf_smbus_poll(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_GW, + MLXBF_I2C_MASTER_BUSY_BIT, true, + MLXBF_I2C_SMBUS_TIMEOUT); + + /* Read cause status bits. */ + cause_status_bits = mlxbf_i2c_read(priv->mst_cause->io, + MLXBF_I2C_CAUSE_ARBITER); + cause_status_bits &= MLXBF_I2C_CAUSE_MASTER_ARBITER_BITS_MASK; + + /* + * Parse both Cause and Master GW bits, then return transaction status. + */ + + master_status_bits = mlxbf_i2c_read(priv->smbus->io, + MLXBF_I2C_SMBUS_MASTER_STATUS); + master_status_bits &= MLXBF_I2C_SMBUS_MASTER_STATUS_MASK; + + if (mlxbf_i2c_smbus_transaction_success(master_status_bits, + cause_status_bits)) + return 0; + + /* + * In case of timeout on GW busy, the ISR will clear busy bit but + * transaction ended bits cause will not be set so the transaction + * fails. Then, we must check Master GW status bits. + */ + if ((master_status_bits & MLXBF_I2C_SMBUS_MASTER_STATUS_ERROR) && + (cause_status_bits & (MLXBF_I2C_CAUSE_TRANSACTION_ENDED | + MLXBF_I2C_CAUSE_M_GW_BUSY_FALL))) + return -EIO; + + if (cause_status_bits & MLXBF_I2C_CAUSE_MASTER_STATUS_ERROR) + return -EAGAIN; + + return -ETIMEDOUT; +} + +static void mlxbf_i2c_smbus_write_data(struct mlxbf_i2c_priv *priv, + const u8 *data, u8 length, u32 addr) +{ + u8 offset, aligned_length; + u32 data32; + + aligned_length = round_up(length, 4); + + /* Copy data bytes from 4-byte aligned source buffer. */ + for (offset = 0; offset < aligned_length; offset += sizeof(u32)) { + data32 = *((u32 *)(data + offset)); + mlxbf_i2c_write_data(priv->smbus->io, addr + offset, data32); + } +} + +static void mlxbf_i2c_smbus_read_data(struct mlxbf_i2c_priv *priv, + u8 *data, u8 length, u32 addr) +{ + u32 data32, mask; + u8 byte, offset; + + mask = sizeof(u32) - 1; + + for (offset = 0; offset < (length & ~mask); offset += sizeof(u32)) { + data32 = mlxbf_i2c_read_data(priv->smbus->io, addr + offset); + *((u32 *)(data + offset)) = data32; + } + + if (!(length & mask)) + return; + + data32 = mlxbf_i2c_read_data(priv->smbus->io, addr + offset); + + for (byte = 0; byte < (length & mask); byte++) { + data[offset + byte] = data32 & GENMASK(7, 0); + data32 = ror32(data32, MLXBF_I2C_SHIFT_8); + } +} + +static int mlxbf_i2c_smbus_enable(struct mlxbf_i2c_priv *priv, u8 slave, + u8 len, u8 block_en, u8 pec_en, bool read) +{ + u32 command; + + /* Set Master GW control word. */ + if (read) { + command = MLXBF_I2C_MASTER_ENABLE_READ; + command |= rol32(len, MLXBF_I2C_MASTER_READ_SHIFT); + } else { + command = MLXBF_I2C_MASTER_ENABLE_WRITE; + command |= rol32(len, MLXBF_I2C_MASTER_WRITE_SHIFT); + } + command |= rol32(slave, MLXBF_I2C_MASTER_SLV_ADDR_SHIFT); + command |= rol32(block_en, MLXBF_I2C_MASTER_PARSE_EXP_SHIFT); + command |= rol32(pec_en, MLXBF_I2C_MASTER_SEND_PEC_SHIFT); + + /* Clear status bits. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_STATUS, 0x0); + /* Set the cause data. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_CAUSE_OR_CLEAR, ~0x0); + /* Zero PEC byte. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_PEC, 0x0); + /* Zero byte count. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_RS_BYTES, 0x0); + + /* GW activation. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_GW, command); + + /* + * Poll master status and check status bits. An ACK is sent when + * completing writing data to the bus (Master 'byte_count_done' bit + * is set to 1). + */ + return mlxbf_i2c_smbus_check_status(priv); +} + +static int +mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv, + struct mlxbf_i2c_smbus_request *request) +{ + u8 data_desc[MLXBF_I2C_MASTER_DATA_DESC_SIZE] = { 0 }; + u8 op_idx, data_idx, data_len, write_len, read_len; + struct mlxbf_i2c_smbus_operation *operation; + u8 read_en, write_en, block_en, pec_en; + u8 slave, flags, addr; + u8 *read_buf; + int ret = 0; + + if (request->operation_cnt > MLXBF_I2C_SMBUS_MAX_OP_CNT) + return -EINVAL; + + read_buf = NULL; + data_idx = 0; + read_en = 0; + write_en = 0; + write_len = 0; + read_len = 0; + block_en = 0; + pec_en = 0; + slave = request->slave & GENMASK(6, 0); + addr = slave << 1; + + /* First of all, check whether the HW is idle. */ + if (WARN_ON(!mlxbf_smbus_master_wait_for_idle(priv))) + return -EBUSY; + + /* Set first byte. */ + data_desc[data_idx++] = addr; + + for (op_idx = 0; op_idx < request->operation_cnt; op_idx++) { + operation = &request->operation[op_idx]; + flags = operation->flags; + + /* + * Note that read and write operations might be handled by a + * single command. If the MLXBF_I2C_F_SMBUS_OPERATION is set + * then write command byte and set the optional SMBus specific + * bits such as block_en and pec_en. These bits MUST be + * submitted by the first operation only. + */ + if (op_idx == 0 && flags & MLXBF_I2C_F_SMBUS_OPERATION) { + block_en = flags & MLXBF_I2C_F_SMBUS_BLOCK; + pec_en = flags & MLXBF_I2C_F_SMBUS_PEC; + } + + if (flags & MLXBF_I2C_F_WRITE) { + write_en = 1; + write_len += operation->length; + memcpy(data_desc + data_idx, + operation->buffer, operation->length); + data_idx += operation->length; + } + /* + * We assume that read operations are performed only once per + * SMBus transaction. *TBD* protect this statement so it won't + * be executed twice? or return an error if we try to read more + * than once? + */ + if (flags & MLXBF_I2C_F_READ) { + read_en = 1; + /* Subtract 1 as required by HW. */ + read_len = operation->length - 1; + read_buf = operation->buffer; + } + } + + /* Set Master GW data descriptor. */ + data_len = write_len + 1; /* Add one byte of the slave address. */ + /* + * Note that data_len cannot be 0. Indeed, the slave address byte + * must be written to the data registers. + */ + mlxbf_i2c_smbus_write_data(priv, (const u8 *)data_desc, data_len, + MLXBF_I2C_MASTER_DATA_DESC_ADDR); + + if (write_en) { + ret = mlxbf_i2c_smbus_enable(priv, slave, write_len, block_en, + pec_en, 0); + if (ret) + return ret; + } + + if (read_en) { + /* Write slave address to Master GW data descriptor. */ + mlxbf_i2c_smbus_write_data(priv, (const u8 *)&addr, 1, + MLXBF_I2C_MASTER_DATA_DESC_ADDR); + ret = mlxbf_i2c_smbus_enable(priv, slave, read_len, block_en, + pec_en, 1); + if (!ret) { + /* Get Master GW data descriptor. */ + mlxbf_i2c_smbus_read_data(priv, data_desc, read_len + 1, + MLXBF_I2C_MASTER_DATA_DESC_ADDR); + + /* Get data from Master GW data descriptor. */ + memcpy(read_buf, data_desc, read_len + 1); + } + + /* + * After a read operation the SMBus FSM ps (present state) + * needs to be 'manually' reset. This should be removed in + * next tag integration. + */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_FSM, + MLXBF_I2C_SMBUS_MASTER_FSM_PS_STATE_MASK); + } + + return ret; +} + +/* I2C SMBus protocols. */ + +static void +mlxbf_i2c_smbus_quick_command(struct mlxbf_i2c_smbus_request *request, + u8 read) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_1; + + request->operation[0].length = 0; + request->operation[0].flags = MLXBF_I2C_F_WRITE; + request->operation[0].flags |= read ? MLXBF_I2C_F_READ : 0; +} + +static void mlxbf_i2c_smbus_byte_func(struct mlxbf_i2c_smbus_request *request, + u8 *data, bool read, bool pec_check) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_1; + + request->operation[0].length = 1; + request->operation[0].length += pec_check; + + request->operation[0].flags = MLXBF_I2C_F_SMBUS_OPERATION; + request->operation[0].flags |= read ? + MLXBF_I2C_F_READ : MLXBF_I2C_F_WRITE; + request->operation[0].flags |= pec_check ? MLXBF_I2C_F_SMBUS_PEC : 0; + + request->operation[0].buffer = data; +} + +static void +mlxbf_i2c_smbus_data_byte_func(struct mlxbf_i2c_smbus_request *request, + u8 *command, u8 *data, bool read, bool pec_check) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_2; + + request->operation[0].length = 1; + request->operation[0].flags = + MLXBF_I2C_F_SMBUS_OPERATION | MLXBF_I2C_F_WRITE; + request->operation[0].flags |= pec_check ? MLXBF_I2C_F_SMBUS_PEC : 0; + request->operation[0].buffer = command; + + request->operation[1].length = 1; + request->operation[1].length += pec_check; + request->operation[1].flags = read ? + MLXBF_I2C_F_READ : MLXBF_I2C_F_WRITE; + request->operation[1].buffer = data; +} + +static void +mlxbf_i2c_smbus_data_word_func(struct mlxbf_i2c_smbus_request *request, + u8 *command, u8 *data, bool read, bool pec_check) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_2; + + request->operation[0].length = 1; + request->operation[0].flags = + MLXBF_I2C_F_SMBUS_OPERATION | MLXBF_I2C_F_WRITE; + request->operation[0].flags |= pec_check ? MLXBF_I2C_F_SMBUS_PEC : 0; + request->operation[0].buffer = command; + + request->operation[1].length = 2; + request->operation[1].length += pec_check; + request->operation[1].flags = read ? + MLXBF_I2C_F_READ : MLXBF_I2C_F_WRITE; + request->operation[1].buffer = data; +} + +static void +mlxbf_i2c_smbus_i2c_block_func(struct mlxbf_i2c_smbus_request *request, + u8 *command, u8 *data, u8 *data_len, bool read, + bool pec_check) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_2; + + request->operation[0].length = 1; + request->operation[0].flags = + MLXBF_I2C_F_SMBUS_OPERATION | MLXBF_I2C_F_WRITE; + request->operation[0].flags |= pec_check ? MLXBF_I2C_F_SMBUS_PEC : 0; + request->operation[0].buffer = command; + + /* + * As specified in the standard, the max number of bytes to read/write + * per block operation is 32 bytes. In Golan code, the controller can + * read up to 128 bytes and write up to 127 bytes. + */ + request->operation[1].length = + (*data_len + pec_check > I2C_SMBUS_BLOCK_MAX) ? + I2C_SMBUS_BLOCK_MAX : *data_len + pec_check; + request->operation[1].flags = read ? + MLXBF_I2C_F_READ : MLXBF_I2C_F_WRITE; + /* + * Skip the first data byte, which corresponds to the number of bytes + * to read/write. + */ + request->operation[1].buffer = data + 1; + + *data_len = request->operation[1].length; + + /* Set the number of byte to read. This will be used by userspace. */ + if (read) + data[0] = *data_len; +} + +static void mlxbf_i2c_smbus_block_func(struct mlxbf_i2c_smbus_request *request, + u8 *command, u8 *data, u8 *data_len, + bool read, bool pec_check) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_2; + + request->operation[0].length = 1; + request->operation[0].flags = + MLXBF_I2C_F_SMBUS_OPERATION | MLXBF_I2C_F_WRITE; + request->operation[0].flags |= MLXBF_I2C_F_SMBUS_BLOCK; + request->operation[0].flags |= pec_check ? MLXBF_I2C_F_SMBUS_PEC : 0; + request->operation[0].buffer = command; + + request->operation[1].length = + (*data_len + pec_check > I2C_SMBUS_BLOCK_MAX) ? + I2C_SMBUS_BLOCK_MAX : *data_len + pec_check; + request->operation[1].flags = read ? + MLXBF_I2C_F_READ : MLXBF_I2C_F_WRITE; + request->operation[1].buffer = data + 1; + + *data_len = request->operation[1].length; + + /* Set the number of bytes to read. This will be used by userspace. */ + if (read) + data[0] = *data_len; +} + +static void +mlxbf_i2c_smbus_process_call_func(struct mlxbf_i2c_smbus_request *request, + u8 *command, u8 *data, bool pec_check) +{ + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_3; + + request->operation[0].length = 1; + request->operation[0].flags = + MLXBF_I2C_F_SMBUS_OPERATION | MLXBF_I2C_F_WRITE; + request->operation[0].flags |= MLXBF_I2C_F_SMBUS_BLOCK; + request->operation[0].flags |= pec_check ? MLXBF_I2C_F_SMBUS_PEC : 0; + request->operation[0].buffer = command; + + request->operation[1].length = 2; + request->operation[1].flags = MLXBF_I2C_F_WRITE; + request->operation[1].buffer = data; + + request->operation[2].length = 3; + request->operation[2].flags = MLXBF_I2C_F_READ; + request->operation[2].buffer = data; +} + +static void +mlxbf_i2c_smbus_blk_process_call_func(struct mlxbf_i2c_smbus_request *request, + u8 *command, u8 *data, u8 *data_len, + bool pec_check) +{ + u32 length; + + request->operation_cnt = MLXBF_I2C_SMBUS_OP_CNT_3; + + request->operation[0].length = 1; + request->operation[0].flags = + MLXBF_I2C_F_SMBUS_OPERATION | MLXBF_I2C_F_WRITE; + request->operation[0].flags |= MLXBF_I2C_F_SMBUS_BLOCK; + request->operation[0].flags |= (pec_check) ? MLXBF_I2C_F_SMBUS_PEC : 0; + request->operation[0].buffer = command; + + length = (*data_len + pec_check > I2C_SMBUS_BLOCK_MAX) ? + I2C_SMBUS_BLOCK_MAX : *data_len + pec_check; + + request->operation[1].length = length - pec_check; + request->operation[1].flags = MLXBF_I2C_F_WRITE; + request->operation[1].buffer = data; + + request->operation[2].length = length; + request->operation[2].flags = MLXBF_I2C_F_READ; + request->operation[2].buffer = data; + + *data_len = length; /* including PEC byte. */ +} + +/* Initialization functions. */ + +static bool mlxbf_i2c_has_chip_type(struct mlxbf_i2c_priv *priv, u8 type) +{ + return priv->chip->type == type; +} + +static struct mlxbf_i2c_resource * +mlxbf_i2c_get_shared_resource(struct mlxbf_i2c_priv *priv, u8 type) +{ + const struct mlxbf_i2c_chip_info *chip = priv->chip; + struct mlxbf_i2c_resource *res; + u8 res_idx = 0; + + for (res_idx = 0; res_idx < MLXBF_I2C_SHARED_RES_MAX; res_idx++) { + res = chip->shared_res[res_idx]; + if (res && res->type == type) + return res; + } + + return NULL; +} + +static int mlxbf_i2c_init_resource(struct platform_device *pdev, + struct mlxbf_i2c_resource **res, + u8 type) +{ + struct mlxbf_i2c_resource *tmp_res; + struct device *dev = &pdev->dev; + + if (!res || *res || type >= MLXBF_I2C_END_RES) + return -EINVAL; + + tmp_res = devm_kzalloc(dev, sizeof(struct mlxbf_i2c_resource), + GFP_KERNEL); + if (!tmp_res) + return -ENOMEM; + + tmp_res->params = platform_get_resource(pdev, IORESOURCE_MEM, type); + if (!tmp_res->params) { + devm_kfree(dev, tmp_res); + return -EIO; + } + + tmp_res->io = devm_ioremap_resource(dev, tmp_res->params); + if (IS_ERR(tmp_res->io)) { + devm_kfree(dev, tmp_res); + return PTR_ERR(tmp_res->io); + } + + tmp_res->type = type; + + *res = tmp_res; + + return 0; +} + +static u32 mlxbf_i2c_get_ticks(struct mlxbf_i2c_priv *priv, u64 nanoseconds, + bool minimum) +{ + u64 frequency; + u32 ticks; + + /* + * Compute ticks as follow: + * + * Ticks + * Time = --------- x 10^9 => Ticks = Time x Frequency x 10^-9 + * Frequency + */ + frequency = priv->frequency; + ticks = (nanoseconds * frequency) / MLXBF_I2C_FREQUENCY_1GHZ; + /* + * The number of ticks is rounded down and if minimum is equal to 1 + * then add one tick. + */ + if (minimum) + ticks++; + + return ticks; +} + +static u32 mlxbf_i2c_set_timer(struct mlxbf_i2c_priv *priv, u64 nsec, bool opt, + u32 mask, u8 shift) +{ + u32 val = (mlxbf_i2c_get_ticks(priv, nsec, opt) & mask) << shift; + + return val; +} + +static void mlxbf_i2c_set_timings(struct mlxbf_i2c_priv *priv, + const struct mlxbf_i2c_timings *timings) +{ + u32 timer; + + timer = mlxbf_i2c_set_timer(priv, timings->scl_high, + false, MLXBF_I2C_MASK_16, + MLXBF_I2C_SHIFT_0); + timer |= mlxbf_i2c_set_timer(priv, timings->scl_low, + false, MLXBF_I2C_MASK_16, + MLXBF_I2C_SHIFT_16); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_TIMER_SCL_LOW_SCL_HIGH, + timer); + + timer = mlxbf_i2c_set_timer(priv, timings->sda_rise, false, + MLXBF_I2C_MASK_8, MLXBF_I2C_SHIFT_0); + timer |= mlxbf_i2c_set_timer(priv, timings->sda_fall, false, + MLXBF_I2C_MASK_8, MLXBF_I2C_SHIFT_8); + timer |= mlxbf_i2c_set_timer(priv, timings->scl_rise, false, + MLXBF_I2C_MASK_8, MLXBF_I2C_SHIFT_16); + timer |= mlxbf_i2c_set_timer(priv, timings->scl_fall, false, + MLXBF_I2C_MASK_8, MLXBF_I2C_SHIFT_24); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_TIMER_FALL_RISE_SPIKE, + timer); + + timer = mlxbf_i2c_set_timer(priv, timings->hold_start, true, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_0); + timer |= mlxbf_i2c_set_timer(priv, timings->hold_data, true, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_16); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_TIMER_THOLD, timer); + + timer = mlxbf_i2c_set_timer(priv, timings->setup_start, true, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_0); + timer |= mlxbf_i2c_set_timer(priv, timings->setup_stop, true, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_16); + mlxbf_i2c_write(priv->smbus->io, + MLXBF_I2C_SMBUS_TIMER_TSETUP_START_STOP, timer); + + timer = mlxbf_i2c_set_timer(priv, timings->setup_data, true, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_0); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_TIMER_TSETUP_DATA, + timer); + + timer = mlxbf_i2c_set_timer(priv, timings->buf, false, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_0); + timer |= mlxbf_i2c_set_timer(priv, timings->thigh_max, false, + MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_16); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_THIGH_MAX_TBUF, + timer); + + timer = timings->timeout; + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SCL_LOW_TIMEOUT, + timer); +} + +enum mlxbf_i2c_timings_config { + MLXBF_I2C_TIMING_CONFIG_100KHZ, + MLXBF_I2C_TIMING_CONFIG_400KHZ, + MLXBF_I2C_TIMING_CONFIG_1000KHZ, +}; + +/* + * Note that the mlxbf_i2c_timings->timeout value is not related to the + * bus frequency, it is impacted by the time it takes the driver to + * complete data transmission before transaction abort. + */ +static const struct mlxbf_i2c_timings mlxbf_i2c_timings[] = { + [MLXBF_I2C_TIMING_CONFIG_100KHZ] = { + .scl_high = 4810, + .scl_low = 5000, + .hold_start = 4000, + .setup_start = 4800, + .setup_stop = 4000, + .setup_data = 250, + .sda_rise = 50, + .sda_fall = 50, + .scl_rise = 50, + .scl_fall = 50, + .hold_data = 300, + .buf = 20000, + .thigh_max = 5000, + .timeout = 106500 + }, + [MLXBF_I2C_TIMING_CONFIG_400KHZ] = { + .scl_high = 1011, + .scl_low = 1300, + .hold_start = 600, + .setup_start = 700, + .setup_stop = 600, + .setup_data = 100, + .sda_rise = 50, + .sda_fall = 50, + .scl_rise = 50, + .scl_fall = 50, + .hold_data = 300, + .buf = 20000, + .thigh_max = 5000, + .timeout = 106500 + }, + [MLXBF_I2C_TIMING_CONFIG_1000KHZ] = { + .scl_high = 600, + .scl_low = 1300, + .hold_start = 600, + .setup_start = 600, + .setup_stop = 600, + .setup_data = 100, + .sda_rise = 50, + .sda_fall = 50, + .scl_rise = 50, + .scl_fall = 50, + .hold_data = 300, + .buf = 20000, + .thigh_max = 5000, + .timeout = 106500 + } +}; + +static int mlxbf_i2c_init_timings(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + enum mlxbf_i2c_timings_config config_idx; + struct device *dev = &pdev->dev; + u32 config_khz; + + int ret; + + ret = device_property_read_u32(dev, "clock-frequency", &config_khz); + if (ret < 0) + config_khz = MLXBF_I2C_TIMING_100KHZ; + + switch (config_khz) { + default: + /* Default settings is 100 KHz. */ + pr_warn("Illegal value %d: defaulting to 100 KHz\n", + config_khz); + fallthrough; + case MLXBF_I2C_TIMING_100KHZ: + config_idx = MLXBF_I2C_TIMING_CONFIG_100KHZ; + break; + + case MLXBF_I2C_TIMING_400KHZ: + config_idx = MLXBF_I2C_TIMING_CONFIG_400KHZ; + break; + + case MLXBF_I2C_TIMING_1000KHZ: + config_idx = MLXBF_I2C_TIMING_CONFIG_1000KHZ; + break; + } + + mlxbf_i2c_set_timings(priv, &mlxbf_i2c_timings[config_idx]); + + return 0; +} + +static int mlxbf_i2c_get_gpio(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *gpio_res; + struct device *dev = &pdev->dev; + struct resource *params; + resource_size_t size; + + gpio_res = mlxbf_i2c_get_shared_resource(priv, MLXBF_I2C_GPIO_RES); + if (!gpio_res) + return -EPERM; + + /* + * The GPIO region in TYU space is shared among I2C busses. + * This function MUST be serialized to avoid racing when + * claiming the memory region and/or setting up the GPIO. + */ + lockdep_assert_held(gpio_res->lock); + + /* Check whether the memory map exist. */ + if (gpio_res->io) + return 0; + + params = gpio_res->params; + size = resource_size(params); + + if (!devm_request_mem_region(dev, params->start, size, params->name)) + return -EFAULT; + + gpio_res->io = devm_ioremap(dev, params->start, size); + if (IS_ERR(gpio_res->io)) { + devm_release_mem_region(dev, params->start, size); + return PTR_ERR(gpio_res->io); + } + + return 0; +} + +static int mlxbf_i2c_release_gpio(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *gpio_res; + struct device *dev = &pdev->dev; + struct resource *params; + + gpio_res = mlxbf_i2c_get_shared_resource(priv, MLXBF_I2C_GPIO_RES); + if (!gpio_res) + return 0; + + mutex_lock(gpio_res->lock); + + if (gpio_res->io) { + /* Release the GPIO resource. */ + params = gpio_res->params; + devm_iounmap(dev, gpio_res->io); + devm_release_mem_region(dev, params->start, + resource_size(params)); + } + + mutex_unlock(gpio_res->lock); + + return 0; +} + +static int mlxbf_i2c_get_corepll(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *corepll_res; + struct device *dev = &pdev->dev; + struct resource *params; + resource_size_t size; + + corepll_res = mlxbf_i2c_get_shared_resource(priv, + MLXBF_I2C_COREPLL_RES); + if (!corepll_res) + return -EPERM; + + /* + * The COREPLL region in TYU space is shared among I2C busses. + * This function MUST be serialized to avoid racing when + * claiming the memory region. + */ + lockdep_assert_held(corepll_res->lock); + + /* Check whether the memory map exist. */ + if (corepll_res->io) + return 0; + + params = corepll_res->params; + size = resource_size(params); + + if (!devm_request_mem_region(dev, params->start, size, params->name)) + return -EFAULT; + + corepll_res->io = devm_ioremap(dev, params->start, size); + if (IS_ERR(corepll_res->io)) { + devm_release_mem_region(dev, params->start, size); + return PTR_ERR(corepll_res->io); + } + + return 0; +} + +static int mlxbf_i2c_release_corepll(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *corepll_res; + struct device *dev = &pdev->dev; + struct resource *params; + + corepll_res = mlxbf_i2c_get_shared_resource(priv, + MLXBF_I2C_COREPLL_RES); + + mutex_lock(corepll_res->lock); + + if (corepll_res->io) { + /* Release the CorePLL resource. */ + params = corepll_res->params; + devm_iounmap(dev, corepll_res->io); + devm_release_mem_region(dev, params->start, + resource_size(params)); + } + + mutex_unlock(corepll_res->lock); + + return 0; +} + +static int mlxbf_i2c_init_master(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *gpio_res; + struct device *dev = &pdev->dev; + u32 config_reg; + int ret; + + /* This configuration is only needed for BlueField 1. */ + if (!mlxbf_i2c_has_chip_type(priv, MLXBF_I2C_CHIP_TYPE_1)) + return 0; + + gpio_res = mlxbf_i2c_get_shared_resource(priv, MLXBF_I2C_GPIO_RES); + if (!gpio_res) + return -EPERM; + + /* + * The GPIO region in TYU space is shared among I2C busses. + * This function MUST be serialized to avoid racing when + * claiming the memory region and/or setting up the GPIO. + */ + + mutex_lock(gpio_res->lock); + + ret = mlxbf_i2c_get_gpio(pdev, priv); + if (ret < 0) { + dev_err(dev, "Failed to get gpio resource"); + mutex_unlock(gpio_res->lock); + return ret; + } + + /* + * TYU - Configuration for GPIO pins. Those pins must be asserted in + * MLXBF_I2C_GPIO_0_FUNC_EN_0, i.e. GPIO 0 is controlled by HW, and must + * be reset in MLXBF_I2C_GPIO_0_FORCE_OE_EN, i.e. GPIO_OE will be driven + * instead of HW_OE. + * For now, we do not reset the GPIO state when the driver is removed. + * First, it is not necessary to disable the bus since we are using + * the same busses. Then, some busses might be shared among Linux and + * platform firmware; disabling the bus might compromise the system + * functionality. + */ + config_reg = mlxbf_i2c_read(gpio_res->io, + MLXBF_I2C_GPIO_0_FUNC_EN_0); + config_reg = MLXBF_I2C_GPIO_SMBUS_GW_ASSERT_PINS(priv->bus, + config_reg); + mlxbf_i2c_write(gpio_res->io, MLXBF_I2C_GPIO_0_FUNC_EN_0, + config_reg); + + config_reg = mlxbf_i2c_read(gpio_res->io, + MLXBF_I2C_GPIO_0_FORCE_OE_EN); + config_reg = MLXBF_I2C_GPIO_SMBUS_GW_RESET_PINS(priv->bus, + config_reg); + mlxbf_i2c_write(gpio_res->io, MLXBF_I2C_GPIO_0_FORCE_OE_EN, + config_reg); + + mutex_unlock(gpio_res->lock); + + return 0; +} + +static u64 mlxbf_calculate_freq_from_tyu(struct mlxbf_i2c_resource *corepll_res) +{ + u64 core_frequency, pad_frequency; + u8 core_od, core_r; + u32 corepll_val; + u16 core_f; + + pad_frequency = MLXBF_I2C_TYU_PLL_IN_FREQ; + + corepll_val = mlxbf_i2c_read(corepll_res->io, + MLXBF_I2C_CORE_PLL_REG1); + + /* Get Core PLL configuration bits. */ + core_f = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_F_TYU_SHIFT) & + MLXBF_I2C_COREPLL_CORE_F_TYU_MASK; + core_od = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_OD_TYU_SHIFT) & + MLXBF_I2C_COREPLL_CORE_OD_TYU_MASK; + core_r = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_R_TYU_SHIFT) & + MLXBF_I2C_COREPLL_CORE_R_TYU_MASK; + + /* + * Compute PLL output frequency as follow: + * + * CORE_F + 1 + * PLL_OUT_FREQ = PLL_IN_FREQ * ---------------------------- + * (CORE_R + 1) * (CORE_OD + 1) + * + * Where PLL_OUT_FREQ and PLL_IN_FREQ refer to CoreFrequency + * and PadFrequency, respectively. + */ + core_frequency = pad_frequency * (++core_f); + core_frequency /= (++core_r) * (++core_od); + + return core_frequency; +} + +static u64 mlxbf_calculate_freq_from_yu(struct mlxbf_i2c_resource *corepll_res) +{ + u32 corepll_reg1_val, corepll_reg2_val; + u64 corepll_frequency, pad_frequency; + u8 core_od, core_r; + u32 core_f; + + pad_frequency = MLXBF_I2C_YU_PLL_IN_FREQ; + + corepll_reg1_val = mlxbf_i2c_read(corepll_res->io, + MLXBF_I2C_CORE_PLL_REG1); + corepll_reg2_val = mlxbf_i2c_read(corepll_res->io, + MLXBF_I2C_CORE_PLL_REG2); + + /* Get Core PLL configuration bits */ + core_f = rol32(corepll_reg1_val, MLXBF_I2C_COREPLL_CORE_F_YU_SHIFT) & + MLXBF_I2C_COREPLL_CORE_F_YU_MASK; + core_r = rol32(corepll_reg1_val, MLXBF_I2C_COREPLL_CORE_R_YU_SHIFT) & + MLXBF_I2C_COREPLL_CORE_R_YU_MASK; + core_od = rol32(corepll_reg2_val, MLXBF_I2C_COREPLL_CORE_OD_YU_SHIFT) & + MLXBF_I2C_COREPLL_CORE_OD_YU_MASK; + + /* + * Compute PLL output frequency as follow: + * + * CORE_F / 16384 + * PLL_OUT_FREQ = PLL_IN_FREQ * ---------------------------- + * (CORE_R + 1) * (CORE_OD + 1) + * + * Where PLL_OUT_FREQ and PLL_IN_FREQ refer to CoreFrequency + * and PadFrequency, respectively. + */ + corepll_frequency = (pad_frequency * core_f) / MLNXBF_I2C_COREPLL_CONST; + corepll_frequency /= (++core_r) * (++core_od); + + return corepll_frequency; +} + +static int mlxbf_i2c_calculate_corepll_freq(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + const struct mlxbf_i2c_chip_info *chip = priv->chip; + struct mlxbf_i2c_resource *corepll_res; + struct device *dev = &pdev->dev; + u64 *freq = &priv->frequency; + int ret; + + corepll_res = mlxbf_i2c_get_shared_resource(priv, + MLXBF_I2C_COREPLL_RES); + if (!corepll_res) + return -EPERM; + + /* + * First, check whether the TYU core Clock frequency is set. + * The TYU core frequency is the same for all I2C busses; when + * the first device gets probed the frequency is determined and + * stored into a globally visible variable. So, first of all, + * check whether the frequency is already set. Here, we assume + * that the frequency is expected to be greater than 0. + */ + mutex_lock(corepll_res->lock); + if (!mlxbf_i2c_corepll_frequency) { + if (!chip->calculate_freq) { + mutex_unlock(corepll_res->lock); + return -EPERM; + } + + ret = mlxbf_i2c_get_corepll(pdev, priv); + if (ret < 0) { + dev_err(dev, "Failed to get corePLL resource"); + mutex_unlock(corepll_res->lock); + return ret; + } + + mlxbf_i2c_corepll_frequency = chip->calculate_freq(corepll_res); + } + mutex_unlock(corepll_res->lock); + + *freq = mlxbf_i2c_corepll_frequency; + + return 0; +} + +static int mlxbf_slave_enable(struct mlxbf_i2c_priv *priv, u8 addr) +{ + u32 slave_reg, slave_reg_tmp, slave_reg_avail, slave_addr_mask; + u8 reg, reg_cnt, byte, addr_tmp, reg_avail, byte_avail; + bool avail, disabled; + + disabled = false; + avail = false; + + if (!priv) + return -EPERM; + + reg_cnt = MLXBF_I2C_SMBUS_SLAVE_ADDR_CNT >> 2; + slave_addr_mask = MLXBF_I2C_SMBUS_SLAVE_ADDR_MASK; + + /* + * Read the slave registers. There are 4 * 32-bit slave registers. + * Each slave register can hold up to 4 * 8-bit slave configuration + * (7-bit address, 1 status bit (1 if enabled, 0 if not)). + */ + for (reg = 0; reg < reg_cnt; reg++) { + slave_reg = mlxbf_i2c_read(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + reg * 0x4); + /* + * Each register holds 4 slave addresses. So, we have to keep + * the byte order consistent with the value read in order to + * update the register correctly, if needed. + */ + slave_reg_tmp = slave_reg; + for (byte = 0; byte < 4; byte++) { + addr_tmp = slave_reg_tmp & GENMASK(7, 0); + + /* + * Mark the first available slave address slot, i.e. its + * enabled bit should be unset. This slot might be used + * later on to register our slave. + */ + if (!avail && !MLXBF_I2C_SLAVE_ADDR_ENABLED(addr_tmp)) { + avail = true; + reg_avail = reg; + byte_avail = byte; + slave_reg_avail = slave_reg; + } + + /* + * Parse slave address bytes and check whether the + * slave address already exists and it's enabled, + * i.e. most significant bit is set. + */ + if ((addr_tmp & slave_addr_mask) == addr) { + if (MLXBF_I2C_SLAVE_ADDR_ENABLED(addr_tmp)) + return 0; + disabled = true; + break; + } + + /* Parse next byte. */ + slave_reg_tmp >>= 8; + } + + /* Exit the loop if the slave address is found. */ + if (disabled) + break; + } + + if (!avail && !disabled) + return -EINVAL; /* No room for a new slave address. */ + + if (avail && !disabled) { + reg = reg_avail; + byte = byte_avail; + /* Set the slave address. */ + slave_reg_avail &= ~(slave_addr_mask << (byte * 8)); + slave_reg_avail |= addr << (byte * 8); + slave_reg = slave_reg_avail; + } + + /* Enable the slave address and update the register. */ + slave_reg |= (1 << MLXBF_I2C_SMBUS_SLAVE_ADDR_EN_BIT) << (byte * 8); + mlxbf_i2c_write(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + reg * 0x4, slave_reg); + + return 0; +} + +static int mlxbf_slave_disable(struct mlxbf_i2c_priv *priv) +{ + u32 slave_reg, slave_reg_tmp, slave_addr_mask; + u8 addr, addr_tmp, reg, reg_cnt, slave_byte; + struct i2c_client *client = priv->slave; + bool exist; + + exist = false; + + addr = client->addr; + reg_cnt = MLXBF_I2C_SMBUS_SLAVE_ADDR_CNT >> 2; + slave_addr_mask = MLXBF_I2C_SMBUS_SLAVE_ADDR_MASK; + + /* + * Read the slave registers. There are 4 * 32-bit slave registers. + * Each slave register can hold up to 4 * 8-bit slave configuration + * (7-bit address, 1 status bit (1 if enabled, 0 if not)). + */ + for (reg = 0; reg < reg_cnt; reg++) { + slave_reg = mlxbf_i2c_read(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + reg * 0x4); + + /* Check whether the address slots are empty. */ + if (slave_reg == 0) + continue; + + /* + * Each register holds 4 slave addresses. So, we have to keep + * the byte order consistent with the value read in order to + * update the register correctly, if needed. + */ + slave_reg_tmp = slave_reg; + slave_byte = 0; + while (slave_reg_tmp != 0) { + addr_tmp = slave_reg_tmp & slave_addr_mask; + /* + * Parse slave address bytes and check whether the + * slave address already exists. + */ + if (addr_tmp == addr) { + exist = true; + break; + } + + /* Parse next byte. */ + slave_reg_tmp >>= 8; + slave_byte += 1; + } + + /* Exit the loop if the slave address is found. */ + if (exist) + break; + } + + if (!exist) + return 0; /* Slave is not registered, nothing to do. */ + + /* Cleanup the slave address slot. */ + slave_reg &= ~(GENMASK(7, 0) << (slave_byte * 8)); + mlxbf_i2c_write(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + reg * 0x4, slave_reg); + + return 0; +} + +static int mlxbf_i2c_init_coalesce(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *coalesce_res; + struct resource *params; + resource_size_t size; + int ret = 0; + + /* + * Unlike BlueField-1 platform, the coalesce registers is a dedicated + * resource in the next generations of BlueField. + */ + if (mlxbf_i2c_has_chip_type(priv, MLXBF_I2C_CHIP_TYPE_1)) { + coalesce_res = mlxbf_i2c_get_shared_resource(priv, + MLXBF_I2C_COALESCE_RES); + if (!coalesce_res) + return -EPERM; + + /* + * The Cause Coalesce group in TYU space is shared among + * I2C busses. This function MUST be serialized to avoid + * racing when claiming the memory region. + */ + lockdep_assert_held(mlxbf_i2c_gpio_res->lock); + + /* Check whether the memory map exist. */ + if (coalesce_res->io) { + priv->coalesce = coalesce_res; + return 0; + } + + params = coalesce_res->params; + size = resource_size(params); + + if (!request_mem_region(params->start, size, params->name)) + return -EFAULT; + + coalesce_res->io = ioremap(params->start, size); + if (IS_ERR(coalesce_res->io)) { + release_mem_region(params->start, size); + return PTR_ERR(coalesce_res->io); + } + + priv->coalesce = coalesce_res; + + } else { + ret = mlxbf_i2c_init_resource(pdev, &priv->coalesce, + MLXBF_I2C_COALESCE_RES); + } + + return ret; +} + +static int mlxbf_i2c_release_coalesce(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct mlxbf_i2c_resource *coalesce_res; + struct device *dev = &pdev->dev; + struct resource *params; + resource_size_t size; + + coalesce_res = priv->coalesce; + + if (coalesce_res->io) { + params = coalesce_res->params; + size = resource_size(params); + if (mlxbf_i2c_has_chip_type(priv, MLXBF_I2C_CHIP_TYPE_1)) { + mutex_lock(coalesce_res->lock); + iounmap(coalesce_res->io); + release_mem_region(params->start, size); + mutex_unlock(coalesce_res->lock); + } else { + devm_release_mem_region(dev, params->start, size); + } + } + + return 0; +} + +static int mlxbf_i2c_init_slave(struct platform_device *pdev, + struct mlxbf_i2c_priv *priv) +{ + struct device *dev = &pdev->dev; + u32 int_reg; + int ret; + + /* Reset FSM. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_FSM, 0); + + /* + * Enable slave cause interrupt bits. Drive + * MLXBF_I2C_CAUSE_READ_WAIT_FW_RESPONSE and + * MLXBF_I2C_CAUSE_WRITE_SUCCESS, these are enabled when an external + * masters issue a Read and Write, respectively. But, clear all + * interrupts first. + */ + mlxbf_i2c_write(priv->slv_cause->io, + MLXBF_I2C_CAUSE_OR_CLEAR, ~0); + int_reg = MLXBF_I2C_CAUSE_READ_WAIT_FW_RESPONSE; + int_reg |= MLXBF_I2C_CAUSE_WRITE_SUCCESS; + mlxbf_i2c_write(priv->slv_cause->io, + MLXBF_I2C_CAUSE_OR_EVTEN0, int_reg); + + /* Finally, set the 'ready' bit to start handling transactions. */ + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_READY, 0x1); + + /* Initialize the cause coalesce resource. */ + ret = mlxbf_i2c_init_coalesce(pdev, priv); + if (ret < 0) { + dev_err(dev, "failed to initialize cause coalesce\n"); + return ret; + } + + return 0; +} + +static bool mlxbf_i2c_has_coalesce(struct mlxbf_i2c_priv *priv, bool *read, + bool *write) +{ + const struct mlxbf_i2c_chip_info *chip = priv->chip; + u32 coalesce0_reg, cause_reg; + u8 slave_shift, is_set; + + *write = false; + *read = false; + + slave_shift = chip->type != MLXBF_I2C_CHIP_TYPE_1 ? + MLXBF_I2C_CAUSE_YU_SLAVE_BIT : + priv->bus + MLXBF_I2C_CAUSE_TYU_SLAVE_BIT; + + coalesce0_reg = mlxbf_i2c_read(priv->coalesce->io, + MLXBF_I2C_CAUSE_COALESCE_0); + is_set = coalesce0_reg & (1 << slave_shift); + + if (!is_set) + return false; + + /* Check the source of the interrupt, i.e. whether a Read or Write. */ + cause_reg = mlxbf_i2c_read(priv->slv_cause->io, + MLXBF_I2C_CAUSE_ARBITER); + if (cause_reg & MLXBF_I2C_CAUSE_READ_WAIT_FW_RESPONSE) + *read = true; + else if (cause_reg & MLXBF_I2C_CAUSE_WRITE_SUCCESS) + *write = true; + + /* Clear cause bits. */ + mlxbf_i2c_write(priv->slv_cause->io, MLXBF_I2C_CAUSE_OR_CLEAR, ~0x0); + + return true; +} + +static bool mlxbf_smbus_slave_wait_for_idle(struct mlxbf_i2c_priv *priv, + u32 timeout) +{ + u32 mask = MLXBF_I2C_CAUSE_S_GW_BUSY_FALL; + u32 addr = MLXBF_I2C_CAUSE_ARBITER; + + if (mlxbf_smbus_poll(priv->slv_cause->io, addr, mask, false, timeout)) + return true; + + return false; +} + +/* Send byte to 'external' smbus master. */ +static int mlxbf_smbus_irq_send(struct mlxbf_i2c_priv *priv, u8 recv_bytes) +{ + u8 data_desc[MLXBF_I2C_SLAVE_DATA_DESC_SIZE] = { 0 }; + u8 write_size, pec_en, addr, byte, value, byte_cnt, desc_size; + struct i2c_client *slave = priv->slave; + u32 control32, data32; + int ret; + + if (!slave) + return -EINVAL; + + addr = 0; + byte = 0; + desc_size = MLXBF_I2C_SLAVE_DATA_DESC_SIZE; + + /* + * Read bytes received from the external master. These bytes should + * be located in the first data descriptor register of the slave GW. + * These bytes are the slave address byte and the internal register + * address, if supplied. + */ + if (recv_bytes > 0) { + data32 = mlxbf_i2c_read_data(priv->smbus->io, + MLXBF_I2C_SLAVE_DATA_DESC_ADDR); + + /* Parse the received bytes. */ + switch (recv_bytes) { + case 2: + byte = (data32 >> 8) & GENMASK(7, 0); + fallthrough; + case 1: + addr = (data32 & GENMASK(7, 0)) >> 1; + } + + /* Check whether it's our slave address. */ + if (slave->addr != addr) + return -EINVAL; + } + + /* + * I2C read transactions may start by a WRITE followed by a READ. + * Indeed, most slave devices would expect the internal address + * following the slave address byte. So, write that byte first, + * and then, send the requested data bytes to the master. + */ + if (recv_bytes > 1) { + i2c_slave_event(slave, I2C_SLAVE_WRITE_REQUESTED, &value); + value = byte; + ret = i2c_slave_event(slave, I2C_SLAVE_WRITE_RECEIVED, + &value); + i2c_slave_event(slave, I2C_SLAVE_STOP, &value); + + if (ret < 0) + return ret; + } + + /* + * Now, send data to the master; currently, the driver supports + * READ_BYTE, READ_WORD and BLOCK READ protocols. Note that the + * hardware can send up to 128 bytes per transfer. That is the + * size of its data registers. + */ + i2c_slave_event(slave, I2C_SLAVE_READ_REQUESTED, &value); + + for (byte_cnt = 0; byte_cnt < desc_size; byte_cnt++) { + data_desc[byte_cnt] = value; + i2c_slave_event(slave, I2C_SLAVE_READ_PROCESSED, &value); + } + + /* Send a stop condition to the backend. */ + i2c_slave_event(slave, I2C_SLAVE_STOP, &value); + + /* Handle the actual transfer. */ + + /* Set the number of bytes to write to master. */ + write_size = (byte_cnt - 1) & 0x7f; + + /* Write data to Slave GW data descriptor. */ + mlxbf_i2c_smbus_write_data(priv, data_desc, byte_cnt, + MLXBF_I2C_SLAVE_DATA_DESC_ADDR); + + pec_en = 0; /* Disable PEC since it is not supported. */ + + /* Prepare control word. */ + control32 = MLXBF_I2C_SLAVE_ENABLE; + control32 |= rol32(write_size, MLXBF_I2C_SLAVE_WRITE_BYTES_SHIFT); + control32 |= rol32(pec_en, MLXBF_I2C_SLAVE_SEND_PEC_SHIFT); + + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_GW, control32); + + /* + * Wait until the transfer is completed; the driver will wait + * until the GW is idle, a cause will rise on fall of GW busy. + */ + mlxbf_smbus_slave_wait_for_idle(priv, MLXBF_I2C_SMBUS_TIMEOUT); + + /* Release the Slave GW. */ + mlxbf_i2c_write(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES, 0x0); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_PEC, 0x0); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_READY, 0x1); + + return 0; +} + +/* Receive bytes from 'external' smbus master. */ +static int mlxbf_smbus_irq_recv(struct mlxbf_i2c_priv *priv, u8 recv_bytes) +{ + u8 data_desc[MLXBF_I2C_SLAVE_DATA_DESC_SIZE] = { 0 }; + struct i2c_client *slave = priv->slave; + u8 value, byte, addr; + int ret = 0; + + if (!slave) + return -EINVAL; + + /* Read data from Slave GW data descriptor. */ + mlxbf_i2c_smbus_read_data(priv, data_desc, recv_bytes, + MLXBF_I2C_SLAVE_DATA_DESC_ADDR); + + /* Check whether its our slave address. */ + addr = data_desc[0] >> 1; + if (slave->addr != addr) + return -EINVAL; + + /* + * Notify the slave backend; another I2C master wants to write data + * to us. This event is sent once the slave address and the write bit + * is detected. + */ + i2c_slave_event(slave, I2C_SLAVE_WRITE_REQUESTED, &value); + + /* Send the received data to the slave backend. */ + for (byte = 1; byte < recv_bytes; byte++) { + value = data_desc[byte]; + ret = i2c_slave_event(slave, I2C_SLAVE_WRITE_RECEIVED, + &value); + if (ret < 0) + break; + } + + /* Send a stop condition to the backend. */ + i2c_slave_event(slave, I2C_SLAVE_STOP, &value); + + /* Release the Slave GW. */ + mlxbf_i2c_write(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES, 0x0); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_PEC, 0x0); + mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_READY, 0x1); + + return ret; +} + +static irqreturn_t mlxbf_smbus_irq(int irq, void *ptr) +{ + struct mlxbf_i2c_priv *priv = ptr; + bool read, write, irq_is_set; + u32 rw_bytes_reg; + u8 recv_bytes; + + /* + * Read TYU interrupt register and determine the source of the + * interrupt. Based on the source of the interrupt one of the + * following actions are performed: + * - Receive data and send response to master. + * - Send data and release slave GW. + * + * Handle read/write transaction only. CRmaster and Iarp requests + * are ignored for now. + */ + irq_is_set = mlxbf_i2c_has_coalesce(priv, &read, &write); + if (!irq_is_set || (!read && !write)) { + /* Nothing to do here, interrupt was not from this device. */ + return IRQ_NONE; + } + + /* + * The MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES includes the number of + * bytes from/to master. These are defined by 8-bits each. If the lower + * 8 bits are set, then the master expect to read N bytes from the + * slave, if the higher 8 bits are sent then the slave expect N bytes + * from the master. + */ + rw_bytes_reg = mlxbf_i2c_read(priv->smbus->io, + MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES); + recv_bytes = (rw_bytes_reg >> 8) & GENMASK(7, 0); + + /* + * For now, the slave supports 128 bytes transfer. Discard remaining + * data bytes if the master wrote more than + * MLXBF_I2C_SLAVE_DATA_DESC_SIZE, i.e, the actual size of the slave + * data descriptor. + * + * Note that we will never expect to transfer more than 128 bytes; as + * specified in the SMBus standard, block transactions cannot exceed + * 32 bytes. + */ + recv_bytes = recv_bytes > MLXBF_I2C_SLAVE_DATA_DESC_SIZE ? + MLXBF_I2C_SLAVE_DATA_DESC_SIZE : recv_bytes; + + if (read) + mlxbf_smbus_irq_send(priv, recv_bytes); + else + mlxbf_smbus_irq_recv(priv, recv_bytes); + + return IRQ_HANDLED; +} + +/* Return negative errno on error. */ +static s32 mlxbf_i2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, + unsigned short flags, char read_write, + u8 command, int size, + union i2c_smbus_data *data) +{ + struct mlxbf_i2c_smbus_request request = { 0 }; + struct mlxbf_i2c_priv *priv; + bool read, pec; + u8 byte_cnt; + + request.slave = addr; + + read = (read_write == I2C_SMBUS_READ); + pec = flags & I2C_FUNC_SMBUS_PEC; + + switch (size) { + case I2C_SMBUS_QUICK: + mlxbf_i2c_smbus_quick_command(&request, read); + dev_dbg(&adap->dev, "smbus quick, slave 0x%02x\n", addr); + break; + + case I2C_SMBUS_BYTE: + mlxbf_i2c_smbus_byte_func(&request, + read ? &data->byte : &command, read, + pec); + dev_dbg(&adap->dev, "smbus %s byte, slave 0x%02x.\n", + read ? "read" : "write", addr); + break; + + case I2C_SMBUS_BYTE_DATA: + mlxbf_i2c_smbus_data_byte_func(&request, &command, &data->byte, + read, pec); + dev_dbg(&adap->dev, "smbus %s byte data at 0x%02x, slave 0x%02x.\n", + read ? "read" : "write", command, addr); + break; + + case I2C_SMBUS_WORD_DATA: + mlxbf_i2c_smbus_data_word_func(&request, &command, + (u8 *)&data->word, read, pec); + dev_dbg(&adap->dev, "smbus %s word data at 0x%02x, slave 0x%02x.\n", + read ? "read" : "write", command, addr); + break; + + case I2C_SMBUS_I2C_BLOCK_DATA: + byte_cnt = data->block[0]; + mlxbf_i2c_smbus_i2c_block_func(&request, &command, data->block, + &byte_cnt, read, pec); + dev_dbg(&adap->dev, "i2c %s block data, %d bytes at 0x%02x, slave 0x%02x.\n", + read ? "read" : "write", byte_cnt, command, addr); + break; + + case I2C_SMBUS_BLOCK_DATA: + byte_cnt = read ? I2C_SMBUS_BLOCK_MAX : data->block[0]; + mlxbf_i2c_smbus_block_func(&request, &command, data->block, + &byte_cnt, read, pec); + dev_dbg(&adap->dev, "smbus %s block data, %d bytes at 0x%02x, slave 0x%02x.\n", + read ? "read" : "write", byte_cnt, command, addr); + break; + + case I2C_FUNC_SMBUS_PROC_CALL: + mlxbf_i2c_smbus_process_call_func(&request, &command, + (u8 *)&data->word, pec); + dev_dbg(&adap->dev, "process call, wr/rd at 0x%02x, slave 0x%02x.\n", + command, addr); + break; + + case I2C_FUNC_SMBUS_BLOCK_PROC_CALL: + byte_cnt = data->block[0]; + mlxbf_i2c_smbus_blk_process_call_func(&request, &command, + data->block, &byte_cnt, + pec); + dev_dbg(&adap->dev, "block process call, wr/rd %d bytes, slave 0x%02x.\n", + byte_cnt, addr); + break; + + default: + dev_dbg(&adap->dev, "Unsupported I2C/SMBus command %d\n", + size); + return -EOPNOTSUPP; + } + + priv = i2c_get_adapdata(adap); + + return mlxbf_i2c_smbus_start_transaction(priv, &request); +} + +static int mlxbf_i2c_reg_slave(struct i2c_client *slave) +{ + struct mlxbf_i2c_priv *priv = i2c_get_adapdata(slave->adapter); + int ret; + + if (priv->slave) + return -EBUSY; + + /* + * Do not support ten bit chip address and do not use Packet Error + * Checking (PEC). + */ + if (slave->flags & (I2C_CLIENT_TEN | I2C_CLIENT_PEC)) + return -EAFNOSUPPORT; + + ret = mlxbf_slave_enable(priv, slave->addr); + if (ret < 0) + return ret; + + priv->slave = slave; + + return 0; +} + +static int mlxbf_i2c_unreg_slave(struct i2c_client *slave) +{ + struct mlxbf_i2c_priv *priv = i2c_get_adapdata(slave->adapter); + int ret; + + WARN_ON(!priv->slave); + + /* Unregister slave, i.e. disable the slave address in hardware. */ + ret = mlxbf_slave_disable(priv); + if (ret < 0) + return ret; + + priv->slave = NULL; + + return 0; +} + +static u32 mlxbf_i2c_functionality(struct i2c_adapter *adap) +{ + return MLXBF_I2C_FUNC_ALL; +} + +static struct mlxbf_i2c_chip_info mlxbf_i2c_chip[] = { + [MLXBF_I2C_CHIP_TYPE_1] = { + .type = MLXBF_I2C_CHIP_TYPE_1, + .shared_res = { + [0] = &mlxbf_i2c_coalesce_res[MLXBF_I2C_CHIP_TYPE_1], + [1] = &mlxbf_i2c_corepll_res[MLXBF_I2C_CHIP_TYPE_1], + [2] = &mlxbf_i2c_gpio_res[MLXBF_I2C_CHIP_TYPE_1] + }, + .calculate_freq = mlxbf_calculate_freq_from_tyu + }, + [MLXBF_I2C_CHIP_TYPE_2] = { + .type = MLXBF_I2C_CHIP_TYPE_2, + .shared_res = { + [0] = &mlxbf_i2c_corepll_res[MLXBF_I2C_CHIP_TYPE_2] + }, + .calculate_freq = mlxbf_calculate_freq_from_yu + } +}; + +static const struct i2c_algorithm mlxbf_i2c_algo = { + .smbus_xfer = mlxbf_i2c_smbus_xfer, + .functionality = mlxbf_i2c_functionality, + .reg_slave = mlxbf_i2c_reg_slave, + .unreg_slave = mlxbf_i2c_unreg_slave, +}; + +static struct i2c_adapter_quirks mlxbf_i2c_quirks = { + .max_read_len = MLXBF_I2C_MASTER_DATA_R_LENGTH, + .max_write_len = MLXBF_I2C_MASTER_DATA_W_LENGTH, +}; + +static const struct of_device_id mlxbf_i2c_dt_ids[] = { + { + .compatible = "mellanox,i2c-mlxbf1", + .data = &mlxbf_i2c_chip[MLXBF_I2C_CHIP_TYPE_1] + }, + { + .compatible = "mellanox,i2c-mlxbf2", + .data = &mlxbf_i2c_chip[MLXBF_I2C_CHIP_TYPE_2] + }, + {}, +}; + +MODULE_DEVICE_TABLE(of, mlxbf_i2c_dt_ids); + +static const struct acpi_device_id mlxbf_i2c_acpi_ids[] = { + { "MLNXBF03", (kernel_ulong_t)&mlxbf_i2c_chip[MLXBF_I2C_CHIP_TYPE_1] }, + { "MLNXBF23", (kernel_ulong_t)&mlxbf_i2c_chip[MLXBF_I2C_CHIP_TYPE_2] }, + {}, +}; + +MODULE_DEVICE_TABLE(acpi, mlxbf_i2c_acpi_ids); + +static int mlxbf_i2c_acpi_probe(struct device *dev, struct mlxbf_i2c_priv *priv) +{ + const struct acpi_device_id *aid; + struct acpi_device *adev; + unsigned long bus_id = 0; + const char *uid; + int ret; + + if (acpi_disabled) + return -ENOENT; + + adev = ACPI_COMPANION(dev); + if (!adev) + return -ENXIO; + + aid = acpi_match_device(mlxbf_i2c_acpi_ids, dev); + if (!aid) + return -ENODEV; + + priv->chip = (struct mlxbf_i2c_chip_info *)aid->driver_data; + + uid = acpi_device_uid(adev); + if (!uid || !(*uid)) { + dev_err(dev, "Cannot retrieve UID\n"); + return -ENODEV; + } + + ret = kstrtoul(uid, 0, &bus_id); + if (!ret) + priv->bus = bus_id; + + return ret; +} + +static int mlxbf_i2c_of_probe(struct device *dev, struct mlxbf_i2c_priv *priv) +{ + const struct of_device_id *oid; + int bus_id = -1; + + if (IS_ENABLED(CONFIG_OF) && dev->of_node) { + oid = of_match_node(mlxbf_i2c_dt_ids, dev->of_node); + if (!oid) + return -ENODEV; + + priv->chip = oid->data; + + bus_id = of_alias_get_id(dev->of_node, "i2c"); + if (bus_id >= 0) + priv->bus = bus_id; + } + + if (bus_id < 0) { + dev_err(dev, "Cannot get bus id"); + return bus_id; + } + + return 0; +} + +static int mlxbf_i2c_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct mlxbf_i2c_priv *priv; + struct i2c_adapter *adap; + int irq, ret; + + priv = devm_kzalloc(dev, sizeof(struct mlxbf_i2c_priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + ret = mlxbf_i2c_acpi_probe(dev, priv); + if (ret < 0 && ret != -ENOENT && ret != -ENXIO) + ret = mlxbf_i2c_of_probe(dev, priv); + + if (ret < 0) + return ret; + + ret = mlxbf_i2c_init_resource(pdev, &priv->smbus, + MLXBF_I2C_SMBUS_RES); + if (ret < 0) { + dev_err(dev, "Cannot fetch smbus resource info"); + return ret; + } + + ret = mlxbf_i2c_init_resource(pdev, &priv->mst_cause, + MLXBF_I2C_MST_CAUSE_RES); + if (ret < 0) { + dev_err(dev, "Cannot fetch cause master resource info"); + return ret; + } + + ret = mlxbf_i2c_init_resource(pdev, &priv->slv_cause, + MLXBF_I2C_SLV_CAUSE_RES); + if (ret < 0) { + dev_err(dev, "Cannot fetch cause slave resource info"); + return ret; + } + + adap = &priv->adap; + adap->owner = THIS_MODULE; + adap->class = I2C_CLASS_HWMON; + adap->algo = &mlxbf_i2c_algo; + adap->quirks = &mlxbf_i2c_quirks; + adap->dev.parent = dev; + adap->dev.of_node = dev->of_node; + adap->nr = priv->bus; + + snprintf(adap->name, sizeof(adap->name), "i2c%d", adap->nr); + i2c_set_adapdata(adap, priv); + + /* Read Core PLL frequency. */ + ret = mlxbf_i2c_calculate_corepll_freq(pdev, priv); + if (ret < 0) { + dev_err(dev, "cannot get core clock frequency\n"); + /* Set to default value. */ + priv->frequency = MLXBF_I2C_COREPLL_FREQ; + } + + /* + * Initialize master. + * Note that a physical bus might be shared among Linux and firmware + * (e.g., ATF). Thus, the bus should be initialized and ready and + * bus initialization would be unnecessary. This requires additional + * knowledge about physical busses. But, since an extra initialization + * does not really hurt, then keep the code as is. + */ + ret = mlxbf_i2c_init_master(pdev, priv); + if (ret < 0) { + dev_err(dev, "failed to initialize smbus master %d", + priv->bus); + return ret; + } + + mlxbf_i2c_init_timings(pdev, priv); + + mlxbf_i2c_init_slave(pdev, priv); + + irq = platform_get_irq(pdev, 0); + ret = devm_request_irq(dev, irq, mlxbf_smbus_irq, + IRQF_ONESHOT | IRQF_SHARED | IRQF_PROBE_SHARED, + dev_name(dev), priv); + if (ret < 0) { + dev_err(dev, "Cannot get irq %d\n", irq); + return ret; + } + + priv->irq = irq; + + platform_set_drvdata(pdev, priv); + + ret = i2c_add_numbered_adapter(adap); + if (ret < 0) + return ret; + + mutex_lock(&mlxbf_i2c_bus_lock); + mlxbf_i2c_bus_count++; + mutex_unlock(&mlxbf_i2c_bus_lock); + + return 0; +} + +static int mlxbf_i2c_remove(struct platform_device *pdev) +{ + struct mlxbf_i2c_priv *priv = platform_get_drvdata(pdev); + struct device *dev = &pdev->dev; + struct resource *params; + + params = priv->smbus->params; + devm_release_mem_region(dev, params->start, resource_size(params)); + + params = priv->mst_cause->params; + devm_release_mem_region(dev, params->start, resource_size(params)); + + params = priv->slv_cause->params; + devm_release_mem_region(dev, params->start, resource_size(params)); + + /* + * Release shared resources. This should be done when releasing + * the I2C controller. + */ + mutex_lock(&mlxbf_i2c_bus_lock); + if (--mlxbf_i2c_bus_count == 0) { + mlxbf_i2c_release_coalesce(pdev, priv); + mlxbf_i2c_release_corepll(pdev, priv); + mlxbf_i2c_release_gpio(pdev, priv); + } + mutex_unlock(&mlxbf_i2c_bus_lock); + + devm_free_irq(dev, priv->irq, priv); + + i2c_del_adapter(&priv->adap); + + return 0; +} + +static struct platform_driver mlxbf_i2c_driver = { + .probe = mlxbf_i2c_probe, + .remove = mlxbf_i2c_remove, + .driver = { + .name = "i2c-mlxbf", + .of_match_table = mlxbf_i2c_dt_ids, + .acpi_match_table = ACPI_PTR(mlxbf_i2c_acpi_ids), + }, +}; + +static int __init mlxbf_i2c_init(void) +{ + mutex_init(&mlxbf_i2c_coalesce_lock); + mutex_init(&mlxbf_i2c_corepll_lock); + mutex_init(&mlxbf_i2c_gpio_lock); + + mutex_init(&mlxbf_i2c_bus_lock); + + return platform_driver_register(&mlxbf_i2c_driver); +} +module_init(mlxbf_i2c_init); + +static void __exit mlxbf_i2c_exit(void) +{ + platform_driver_unregister(&mlxbf_i2c_driver); + + mutex_destroy(&mlxbf_i2c_bus_lock); + + mutex_destroy(&mlxbf_i2c_gpio_lock); + mutex_destroy(&mlxbf_i2c_corepll_lock); + mutex_destroy(&mlxbf_i2c_coalesce_lock); +} +module_exit(mlxbf_i2c_exit); + +MODULE_DESCRIPTION("Mellanox BlueField I2C bus driver"); +MODULE_AUTHOR("Khalil Blaiech "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 8d9d4ffdcd24..e0e45fc19b8f 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -496,11 +496,10 @@ static irqreturn_t mv64xxx_i2c_intr(int irq, void *dev_id) { struct mv64xxx_i2c_data *drv_data = dev_id; - unsigned long flags; u32 status; irqreturn_t rc = IRQ_NONE; - spin_lock_irqsave(&drv_data->lock, flags); + spin_lock(&drv_data->lock); if (drv_data->offload_enabled) rc = mv64xxx_i2c_intr_offload(drv_data); @@ -517,7 +516,7 @@ mv64xxx_i2c_intr(int irq, void *dev_id) rc = IRQ_HANDLED; } - spin_unlock_irqrestore(&drv_data->lock, flags); + spin_unlock(&drv_data->lock); return rc; } diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c index f480105000b8..f9a69b109e5c 100644 --- a/drivers/i2c/busses/i2c-nvidia-gpu.c +++ b/drivers/i2c/busses/i2c-nvidia-gpu.c @@ -125,8 +125,7 @@ static int gpu_i2c_read(struct gpu_i2c_dev *i2cd, u8 *data, u16 len) put_unaligned_be16(val, data); break; case 3: - put_unaligned_be16(val >> 8, data); - data[2] = val; + put_unaligned_be24(val, data); break; case 4: put_unaligned_be32(val, data); diff --git a/drivers/i2c/busses/i2c-owl.c b/drivers/i2c/busses/i2c-owl.c index a163b8f308c1..9918b2a0b909 100644 --- a/drivers/i2c/busses/i2c-owl.c +++ b/drivers/i2c/busses/i2c-owl.c @@ -165,10 +165,9 @@ static irqreturn_t owl_i2c_interrupt(int irq, void *_dev) { struct owl_i2c_dev *i2c_dev = _dev; struct i2c_msg *msg = i2c_dev->msg; - unsigned long flags; unsigned int stat, fifostat; - spin_lock_irqsave(&i2c_dev->lock, flags); + spin_lock(&i2c_dev->lock); i2c_dev->err = 0; @@ -214,7 +213,7 @@ static irqreturn_t owl_i2c_interrupt(int irq, void *_dev) OWL_I2C_STAT_IRQP, true); complete_all(&i2c_dev->msg_complete); - spin_unlock_irqrestore(&i2c_dev->lock, flags); + spin_unlock(&i2c_dev->lock); return IRQ_HANDLED; } diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index dead5db3315a..8b4c35f47a70 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -210,9 +210,8 @@ static irqreturn_t geni_i2c_irq(int irq, void *dev) u32 dma; u32 val; struct i2c_msg *cur; - unsigned long flags; - spin_lock_irqsave(&gi2c->lock, flags); + spin_lock(&gi2c->lock); m_stat = readl_relaxed(base + SE_GENI_M_IRQ_STATUS); rx_st = readl_relaxed(base + SE_GENI_RX_FIFO_STATUS); dm_tx_st = readl_relaxed(base + SE_DMA_TX_IRQ_STAT); @@ -294,7 +293,7 @@ static irqreturn_t geni_i2c_irq(int irq, void *dev) dm_rx_st & RX_DMA_DONE || dm_rx_st & RX_RESET_DONE) complete(&gi2c->done); - spin_unlock_irqrestore(&gi2c->lock, flags); + spin_unlock(&gi2c->lock); return IRQ_HANDLED; } diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index c7c543483b08..217def2d7cb4 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -19,7 +19,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -105,10 +107,11 @@ #define ID_ARBLOST (1 << 3) #define ID_NACK (1 << 4) /* persistent flags */ +#define ID_P_HOST_NOTIFY BIT(28) #define ID_P_REP_AFTER_RD BIT(29) #define ID_P_NO_RXDMA BIT(30) /* HW forbids RXDMA sometimes */ #define ID_P_PM_BLOCKED BIT(31) -#define ID_P_MASK GENMASK(31, 29) +#define ID_P_MASK GENMASK(31, 28) enum rcar_i2c_type { I2C_RCAR_GEN1, @@ -140,14 +143,13 @@ struct rcar_i2c_priv { struct reset_control *rstc; int irq; + + struct i2c_client *host_notify_client; }; #define rcar_i2c_priv_to_dev(p) ((p)->adap.dev.parent) #define rcar_i2c_is_recv(p) ((p)->msg->flags & I2C_M_RD) -#define LOOP_TIMEOUT 1024 - - static void rcar_i2c_write(struct rcar_i2c_priv *priv, int reg, u32 val) { writel(val, priv->io + reg); @@ -221,18 +223,18 @@ static void rcar_i2c_init(struct rcar_i2c_priv *priv) static int rcar_i2c_bus_barrier(struct rcar_i2c_priv *priv) { - int i; + int ret; + u32 val; - for (i = 0; i < LOOP_TIMEOUT; i++) { - /* make sure that bus is not busy */ - if (!(rcar_i2c_read(priv, ICMCR) & FSDA)) - return 0; - udelay(1); + ret = readl_poll_timeout(priv->io + ICMCR, val, !(val & FSDA), 10, + priv->adap.timeout); + if (ret) { + /* Waiting did not help, try to recover */ + priv->recovery_icmcr = MDBS | OBPC | FSDA | FSCL; + ret = i2c_recover_bus(&priv->adap); } - /* Waiting did not help, try to recover */ - priv->recovery_icmcr = MDBS | OBPC | FSDA | FSCL; - return i2c_recover_bus(&priv->adap); + return ret; } static int rcar_i2c_clock_calculate(struct rcar_i2c_priv *priv) @@ -760,20 +762,14 @@ static void rcar_i2c_release_dma(struct rcar_i2c_priv *priv) /* I2C is a special case, we need to poll the status of a reset */ static int rcar_i2c_do_reset(struct rcar_i2c_priv *priv) { - int i, ret; + int ret; ret = reset_control_reset(priv->rstc); if (ret) return ret; - for (i = 0; i < LOOP_TIMEOUT; i++) { - ret = reset_control_status(priv->rstc); - if (ret == 0) - return 0; - udelay(1); - } - - return -ETIMEDOUT; + return read_poll_timeout_atomic(reset_control_status, ret, ret == 0, 1, + 100, false, priv->rstc); } static int rcar_i2c_master_xfer(struct i2c_adapter *adap, @@ -884,14 +880,21 @@ static int rcar_unreg_slave(struct i2c_client *slave) static u32 rcar_i2c_func(struct i2c_adapter *adap) { + struct rcar_i2c_priv *priv = i2c_get_adapdata(adap); + /* * This HW can't do: * I2C_SMBUS_QUICK (setting FSB during START didn't work) * I2C_M_NOSTART (automatically sends address after START) * I2C_M_IGNORE_NAK (automatically sends STOP after NAK) */ - return I2C_FUNC_I2C | I2C_FUNC_SLAVE | - (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); + u32 func = I2C_FUNC_I2C | I2C_FUNC_SLAVE | + (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); + + if (priv->flags & ID_P_HOST_NOTIFY) + func |= I2C_FUNC_SMBUS_HOST_NOTIFY; + + return func; } static const struct i2c_algorithm rcar_i2c_algo = { @@ -991,6 +994,8 @@ static int rcar_i2c_probe(struct platform_device *pdev) else pm_runtime_put(dev); + if (of_property_read_bool(dev->of_node, "smbus")) + priv->flags |= ID_P_HOST_NOTIFY; priv->irq = platform_get_irq(pdev, 0); ret = devm_request_irq(dev, priv->irq, rcar_i2c_irq, 0, dev_name(dev), priv); @@ -1005,10 +1010,20 @@ static int rcar_i2c_probe(struct platform_device *pdev) if (ret < 0) goto out_pm_disable; + if (priv->flags & ID_P_HOST_NOTIFY) { + priv->host_notify_client = i2c_new_slave_host_notify_device(adap); + if (IS_ERR(priv->host_notify_client)) { + ret = PTR_ERR(priv->host_notify_client); + goto out_del_device; + } + } + dev_info(dev, "probed\n"); return 0; + out_del_device: + i2c_del_adapter(&priv->adap); out_pm_put: pm_runtime_put(dev); out_pm_disable: @@ -1021,6 +1036,8 @@ static int rcar_i2c_remove(struct platform_device *pdev) struct rcar_i2c_priv *priv = platform_get_drvdata(pdev); struct device *dev = &pdev->dev; + if (priv->host_notify_client) + i2c_free_slave_host_notify_device(priv->host_notify_client); i2c_del_adapter(&priv->adap); rcar_i2c_release_dma(priv); if (priv->flags & ID_P_PM_BLOCKED) diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c index 8e3cc85d1921..819ab4ee517e 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c @@ -1312,18 +1312,13 @@ static int rk3x_i2c_probe(struct platform_device *pdev) i2c->pclk = devm_clk_get(&pdev->dev, "pclk"); } - if (IS_ERR(i2c->clk)) { - ret = PTR_ERR(i2c->clk); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "Can't get bus clk: %d\n", ret); - return ret; - } - if (IS_ERR(i2c->pclk)) { - ret = PTR_ERR(i2c->pclk); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "Can't get periph clk: %d\n", ret); - return ret; - } + if (IS_ERR(i2c->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(i2c->clk), + "Can't get bus clk\n"); + + if (IS_ERR(i2c->pclk)) + return dev_err_probe(&pdev->dev, PTR_ERR(i2c->pclk), + "Can't get periph clk\n"); ret = clk_prepare(i2c->clk); if (ret < 0) { diff --git a/drivers/i2c/busses/i2c-stm32.c b/drivers/i2c/busses/i2c-stm32.c index 3f69a3bb6119..157c64e27d0b 100644 --- a/drivers/i2c/busses/i2c-stm32.c +++ b/drivers/i2c/busses/i2c-stm32.c @@ -26,8 +26,9 @@ struct stm32_i2c_dma *stm32_i2c_dma_request(struct device *dev, dma->chan_tx = dma_request_chan(dev, "tx"); if (IS_ERR(dma->chan_tx)) { ret = PTR_ERR(dma->chan_tx); - if (ret != -EPROBE_DEFER) - dev_err(dev, "can't request DMA tx channel\n"); + if (ret != -ENODEV) + ret = dev_err_probe(dev, ret, + "can't request DMA tx channel\n"); goto fail_al; } @@ -46,8 +47,9 @@ struct stm32_i2c_dma *stm32_i2c_dma_request(struct device *dev, dma->chan_rx = dma_request_chan(dev, "rx"); if (IS_ERR(dma->chan_rx)) { ret = PTR_ERR(dma->chan_rx); - if (ret != -EPROBE_DEFER) - dev_err(dev, "can't request DMA rx channel\n"); + if (ret != -ENODEV) + ret = dev_err_probe(dev, ret, + "can't request DMA rx channel\n"); goto fail_tx; } @@ -76,8 +78,6 @@ struct stm32_i2c_dma *stm32_i2c_dma_request(struct device *dev, dma_release_channel(dma->chan_tx); fail_al: devm_kfree(dev, dma); - if (ret != -EPROBE_DEFER) - dev_info(dev, "can't use DMA\n"); return ERR_PTR(ret); } diff --git a/drivers/i2c/busses/i2c-stm32f4.c b/drivers/i2c/busses/i2c-stm32f4.c index 48e269284369..937c2c8fd349 100644 --- a/drivers/i2c/busses/i2c-stm32f4.c +++ b/drivers/i2c/busses/i2c-stm32f4.c @@ -797,10 +797,8 @@ static int stm32f4_i2c_probe(struct platform_device *pdev) rst = devm_reset_control_get_exclusive(&pdev->dev, NULL); if (IS_ERR(rst)) { - ret = PTR_ERR(rst); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "Error: Missing reset ctrl\n"); - + ret = dev_err_probe(&pdev->dev, PTR_ERR(rst), + "Error: Missing reset ctrl\n"); goto clk_free; } reset_control_assert(rst); diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index bff3479fe122..f41f51a176a1 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,7 @@ /* STM32F7 I2C control 1 */ #define STM32F7_I2C_CR1_PECEN BIT(23) +#define STM32F7_I2C_CR1_SMBHEN BIT(20) #define STM32F7_I2C_CR1_WUPEN BIT(18) #define STM32F7_I2C_CR1_SBC BIT(16) #define STM32F7_I2C_CR1_RXDMAEN BIT(15) @@ -150,7 +152,12 @@ #define STM32F7_I2C_MAX_LEN 0xff #define STM32F7_I2C_DMA_LEN_MIN 0x16 -#define STM32F7_I2C_MAX_SLAVE 0x2 +enum { + STM32F7_SLAVE_HOSTNOTIFY, + STM32F7_SLAVE_7_10_BITS_ADDR, + STM32F7_SLAVE_7_BITS_ADDR, + STM32F7_I2C_MAX_SLAVE +}; #define STM32F7_I2C_DNF_DEFAULT 0 #define STM32F7_I2C_DNF_MAX 16 @@ -301,6 +308,8 @@ struct stm32f7_i2c_msg { * @fmp_creg: register address for clearing Fast Mode Plus bits * @fmp_mask: mask for Fast Mode Plus bits in set register * @wakeup_src: boolean to know if the device is a wakeup source + * @smbus_mode: states that the controller is configured in SMBus mode + * @host_notify_client: SMBus host-notify client */ struct stm32f7_i2c_dev { struct i2c_adapter adap; @@ -327,6 +336,8 @@ struct stm32f7_i2c_dev { u32 fmp_creg; u32 fmp_mask; bool wakeup_src; + bool smbus_mode; + struct i2c_client *host_notify_client; }; /* @@ -1321,11 +1332,20 @@ static int stm32f7_i2c_get_free_slave_id(struct stm32f7_i2c_dev *i2c_dev, int i; /* - * slave[0] supports 7-bit and 10-bit slave address - * slave[1] supports 7-bit slave address only + * slave[STM32F7_SLAVE_HOSTNOTIFY] support only SMBus Host address (0x8) + * slave[STM32F7_SLAVE_7_10_BITS_ADDR] supports 7-bit and 10-bit slave address + * slave[STM32F7_SLAVE_7_BITS_ADDR] supports 7-bit slave address only */ - for (i = STM32F7_I2C_MAX_SLAVE - 1; i >= 0; i--) { - if (i == 1 && (slave->flags & I2C_CLIENT_TEN)) + if (i2c_dev->smbus_mode && (slave->addr == 0x08)) { + if (i2c_dev->slave[STM32F7_SLAVE_HOSTNOTIFY]) + goto fail; + *id = STM32F7_SLAVE_HOSTNOTIFY; + return 0; + } + + for (i = STM32F7_I2C_MAX_SLAVE - 1; i > STM32F7_SLAVE_HOSTNOTIFY; i--) { + if ((i == STM32F7_SLAVE_7_BITS_ADDR) && + (slave->flags & I2C_CLIENT_TEN)) continue; if (!i2c_dev->slave[i]) { *id = i; @@ -1333,6 +1353,7 @@ static int stm32f7_i2c_get_free_slave_id(struct stm32f7_i2c_dev *i2c_dev, } } +fail: dev_err(dev, "Slave 0x%x could not be registered\n", slave->addr); return -EINVAL; @@ -1776,7 +1797,13 @@ static int stm32f7_i2c_reg_slave(struct i2c_client *slave) if (!stm32f7_i2c_is_slave_registered(i2c_dev)) stm32f7_i2c_enable_wakeup(i2c_dev, true); - if (id == 0) { + switch (id) { + case 0: + /* Slave SMBus Host */ + i2c_dev->slave[id] = slave; + break; + + case 1: /* Configure Own Address 1 */ oar1 = readl_relaxed(i2c_dev->base + STM32F7_I2C_OAR1); oar1 &= ~STM32F7_I2C_OAR1_MASK; @@ -1789,7 +1816,9 @@ static int stm32f7_i2c_reg_slave(struct i2c_client *slave) oar1 |= STM32F7_I2C_OAR1_OA1EN; i2c_dev->slave[id] = slave; writel_relaxed(oar1, i2c_dev->base + STM32F7_I2C_OAR1); - } else if (id == 1) { + break; + + case 2: /* Configure Own Address 2 */ oar2 = readl_relaxed(i2c_dev->base + STM32F7_I2C_OAR2); oar2 &= ~STM32F7_I2C_OAR2_MASK; @@ -1802,7 +1831,10 @@ static int stm32f7_i2c_reg_slave(struct i2c_client *slave) oar2 |= STM32F7_I2C_OAR2_OA2EN; i2c_dev->slave[id] = slave; writel_relaxed(oar2, i2c_dev->base + STM32F7_I2C_OAR2); - } else { + break; + + default: + dev_err(dev, "I2C slave id not supported\n"); ret = -ENODEV; goto pm_free; } @@ -1843,10 +1875,10 @@ static int stm32f7_i2c_unreg_slave(struct i2c_client *slave) if (ret < 0) return ret; - if (id == 0) { + if (id == 1) { mask = STM32F7_I2C_OAR1_OA1EN; stm32f7_i2c_clr_bits(base + STM32F7_I2C_OAR1, mask); - } else { + } else if (id == 2) { mask = STM32F7_I2C_OAR2_OA2EN; stm32f7_i2c_clr_bits(base + STM32F7_I2C_OAR2, mask); } @@ -1911,14 +1943,51 @@ static int stm32f7_i2c_setup_fm_plus_bits(struct platform_device *pdev, &i2c_dev->fmp_mask); } +static int stm32f7_i2c_enable_smbus_host(struct stm32f7_i2c_dev *i2c_dev) +{ + struct i2c_adapter *adap = &i2c_dev->adap; + void __iomem *base = i2c_dev->base; + struct i2c_client *client; + + client = i2c_new_slave_host_notify_device(adap); + if (IS_ERR(client)) + return PTR_ERR(client); + + i2c_dev->host_notify_client = client; + + /* Enable SMBus Host address */ + stm32f7_i2c_set_bits(base + STM32F7_I2C_CR1, STM32F7_I2C_CR1_SMBHEN); + + return 0; +} + +static void stm32f7_i2c_disable_smbus_host(struct stm32f7_i2c_dev *i2c_dev) +{ + void __iomem *base = i2c_dev->base; + + if (i2c_dev->host_notify_client) { + /* Disable SMBus Host address */ + stm32f7_i2c_clr_bits(base + STM32F7_I2C_CR1, + STM32F7_I2C_CR1_SMBHEN); + i2c_free_slave_host_notify_device(i2c_dev->host_notify_client); + } +} + static u32 stm32f7_i2c_func(struct i2c_adapter *adap) { - return I2C_FUNC_I2C | I2C_FUNC_10BIT_ADDR | I2C_FUNC_SLAVE | - I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE | - I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA | - I2C_FUNC_SMBUS_BLOCK_DATA | I2C_FUNC_SMBUS_BLOCK_PROC_CALL | - I2C_FUNC_SMBUS_PROC_CALL | I2C_FUNC_SMBUS_PEC | - I2C_FUNC_SMBUS_I2C_BLOCK; + struct stm32f7_i2c_dev *i2c_dev = i2c_get_adapdata(adap); + + u32 func = I2C_FUNC_I2C | I2C_FUNC_10BIT_ADDR | I2C_FUNC_SLAVE | + I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE | + I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA | + I2C_FUNC_SMBUS_BLOCK_DATA | I2C_FUNC_SMBUS_BLOCK_PROC_CALL | + I2C_FUNC_SMBUS_PROC_CALL | I2C_FUNC_SMBUS_PEC | + I2C_FUNC_SMBUS_I2C_BLOCK; + + if (i2c_dev->smbus_mode) + func |= I2C_FUNC_SMBUS_HOST_NOTIFY; + + return func; } static const struct i2c_algorithm stm32f7_i2c_algo = { @@ -1968,11 +2037,9 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) "wakeup-source"); i2c_dev->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(i2c_dev->clk)) { - if (PTR_ERR(i2c_dev->clk) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Failed to get controller clock\n"); - return PTR_ERR(i2c_dev->clk); - } + if (IS_ERR(i2c_dev->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(i2c_dev->clk), + "Failed to get controller clock\n"); ret = clk_prepare_enable(i2c_dev->clk); if (ret) { @@ -1982,10 +2049,8 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) rst = devm_reset_control_get(&pdev->dev, NULL); if (IS_ERR(rst)) { - ret = PTR_ERR(rst); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "Error: Missing reset ctrl\n"); - + ret = dev_err_probe(&pdev->dev, PTR_ERR(rst), + "Error: Missing reset ctrl\n"); goto clk_free; } reset_control_assert(rst); @@ -2052,14 +2117,13 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) i2c_dev->dma = stm32_i2c_dma_request(i2c_dev->dev, phy_addr, STM32F7_I2C_TXDR, STM32F7_I2C_RXDR); - if (PTR_ERR(i2c_dev->dma) == -ENODEV) - i2c_dev->dma = NULL; - else if (IS_ERR(i2c_dev->dma)) { + if (IS_ERR(i2c_dev->dma)) { ret = PTR_ERR(i2c_dev->dma); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "Failed to request dma error %i\n", ret); - goto fmp_clear; + /* DMA support is optional, only report other errors */ + if (ret != -ENODEV) + goto fmp_clear; + dev_dbg(i2c_dev->dev, "No DMA option: fallback using interrupts\n"); + i2c_dev->dma = NULL; } if (i2c_dev->wakeup_src) { @@ -2084,10 +2148,22 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) stm32f7_i2c_hw_config(i2c_dev); + i2c_dev->smbus_mode = of_property_read_bool(pdev->dev.of_node, "smbus"); + ret = i2c_add_adapter(adap); if (ret) goto pm_disable; + if (i2c_dev->smbus_mode) { + ret = stm32f7_i2c_enable_smbus_host(i2c_dev); + if (ret) { + dev_err(i2c_dev->dev, + "failed to enable SMBus Host-Notify protocol (%d)\n", + ret); + goto i2c_adapter_remove; + } + } + dev_info(i2c_dev->dev, "STM32F7 I2C-%d bus adapter\n", adap->nr); pm_runtime_mark_last_busy(i2c_dev->dev); @@ -2095,6 +2171,9 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) return 0; +i2c_adapter_remove: + i2c_del_adapter(adap); + pm_disable: pm_runtime_put_noidle(i2c_dev->dev); pm_runtime_disable(i2c_dev->dev); @@ -2126,6 +2205,8 @@ static int stm32f7_i2c_remove(struct platform_device *pdev) { struct stm32f7_i2c_dev *i2c_dev = platform_get_drvdata(pdev); + stm32f7_i2c_disable_smbus_host(i2c_dev); + i2c_del_adapter(&i2c_dev->adap); pm_runtime_get_sync(i2c_dev->dev); diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 00d3e4d7a01e..6f08c0c3238d 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -136,7 +136,7 @@ /* configuration load timeout in microseconds */ #define I2C_CONFIG_LOAD_TIMEOUT 1000000 -/* Packet header size in bytes */ +/* packet header size in bytes */ #define I2C_PACKET_HEADER_SIZE 12 /* @@ -148,11 +148,10 @@ #define I2C_PIO_MODE_PREFERRED_LEN 32 /* - * msg_end_type: The bus control which need to be send at end of transfer. - * @MSG_END_STOP: Send stop pulse at end of transfer. - * @MSG_END_REPEAT_START: Send repeat start at end of transfer. - * @MSG_END_CONTINUE: The following on message is coming and so do not send - * stop or repeat start. + * msg_end_type: The bus control which needs to be sent at end of transfer. + * @MSG_END_STOP: Send stop pulse. + * @MSG_END_REPEAT_START: Send repeat-start. + * @MSG_END_CONTINUE: Don't send stop or repeat-start. */ enum msg_end_type { MSG_END_STOP, @@ -161,13 +160,10 @@ enum msg_end_type { }; /** - * struct tegra_i2c_hw_feature : Different HW support on Tegra - * @has_continue_xfer_support: Continue transfer supports. + * struct tegra_i2c_hw_feature : per hardware generation features + * @has_continue_xfer_support: continue-transfer supported * @has_per_pkt_xfer_complete_irq: Has enable/disable capability for transfer - * complete interrupt per packet basis. - * @has_single_clk_source: The I2C controller has single clock source. Tegra30 - * and earlier SoCs have two clock sources i.e. div-clk and - * fast-clk. + * completion interrupt on per packet basis. * @has_config_load_reg: Has the config load register to load the new * configuration. * @clk_divisor_hs_mode: Clock divisor in HS mode. @@ -187,7 +183,7 @@ enum msg_end_type { * @has_mst_fifo: The I2C controller contains the new MST FIFO interface that * provides additional features and allows for longer messages to * be transferred in one go. - * @quirks: i2c adapter quirks for limiting write/read transfer size and not + * @quirks: I2C adapter quirks for limiting write/read transfer size and not * allowing 0 length transfers. * @supports_bus_clear: Bus Clear support to recover from bus hang during * SDA stuck low from device for some unknown reasons. @@ -208,22 +204,21 @@ enum msg_end_type { struct tegra_i2c_hw_feature { bool has_continue_xfer_support; bool has_per_pkt_xfer_complete_irq; - bool has_single_clk_source; bool has_config_load_reg; - int clk_divisor_hs_mode; - int clk_divisor_std_mode; - int clk_divisor_fast_mode; - u16 clk_divisor_fast_plus_mode; + u32 clk_divisor_hs_mode; + u32 clk_divisor_std_mode; + u32 clk_divisor_fast_mode; + u32 clk_divisor_fast_plus_mode; bool has_multi_master_mode; bool has_slcg_override_reg; bool has_mst_fifo; const struct i2c_adapter_quirks *quirks; bool supports_bus_clear; bool has_apb_dma; - u8 tlow_std_mode; - u8 thigh_std_mode; - u8 tlow_fast_fastplus_mode; - u8 thigh_fast_fastplus_mode; + u32 tlow_std_mode; + u32 thigh_std_mode; + u32 tlow_fast_fastplus_mode; + u32 thigh_fast_fastplus_mode; u32 setup_hold_time_std_mode; u32 setup_hold_time_fast_fast_plus_mode; u32 setup_hold_time_hs_mode; @@ -236,7 +231,8 @@ struct tegra_i2c_hw_feature { * @hw: Tegra I2C HW feature * @adapter: core I2C layer adapter information * @div_clk: clock reference for div clock of I2C controller - * @fast_clk: clock reference for fast clock of I2C controller + * @clocks: array of I2C controller clocks + * @nclocks: number of clocks in the array * @rst: reset control for the I2C controller * @base: ioremapped registers cookie * @base_phys: physical base address of the I2C controller @@ -248,101 +244,103 @@ struct tegra_i2c_hw_feature { * @msg_err: error code for completed message * @msg_buf: pointer to current message data * @msg_buf_remaining: size of unsent data in the message buffer - * @msg_read: identifies read transfers + * @msg_read: indicates that the transfer is a read access * @bus_clk_rate: current I2C bus clock rate - * @clk_divisor_non_hs_mode: clock divider for non-high-speed modes - * @is_multimaster_mode: track if I2C controller is in multi-master mode + * @multimaster_mode: indicates that I2C controller is in multi-master mode * @tx_dma_chan: DMA transmit channel * @rx_dma_chan: DMA receive channel * @dma_phys: handle to DMA resources * @dma_buf: pointer to allocated DMA buffer * @dma_buf_size: DMA buffer size - * @is_curr_dma_xfer: indicates active DMA transfer + * @dma_mode: indicates active DMA transfer * @dma_complete: DMA completion notifier - * @is_curr_atomic_xfer: indicates active atomic transfer + * @atomic_mode: indicates active atomic transfer */ struct tegra_i2c_dev { struct device *dev; - const struct tegra_i2c_hw_feature *hw; struct i2c_adapter adapter; - struct clk *div_clk; - struct clk *fast_clk; - struct clk *slow_clk; + + const struct tegra_i2c_hw_feature *hw; struct reset_control *rst; - void __iomem *base; + unsigned int cont_id; + unsigned int irq; + phys_addr_t base_phys; - int cont_id; - int irq; - int is_dvc; - bool is_vi; + void __iomem *base; + + struct clk_bulk_data clocks[2]; + unsigned int nclocks; + + struct clk *div_clk; + u32 bus_clk_rate; + struct completion msg_complete; + size_t msg_buf_remaining; int msg_err; u8 *msg_buf; - size_t msg_buf_remaining; - int msg_read; - u32 bus_clk_rate; - u16 clk_divisor_non_hs_mode; - bool is_multimaster_mode; + + struct completion dma_complete; struct dma_chan *tx_dma_chan; struct dma_chan *rx_dma_chan; - dma_addr_t dma_phys; - u32 *dma_buf; unsigned int dma_buf_size; - bool is_curr_dma_xfer; - struct completion dma_complete; - bool is_curr_atomic_xfer; + dma_addr_t dma_phys; + void *dma_buf; + + bool multimaster_mode; + bool atomic_mode; + bool dma_mode; + bool msg_read; + bool is_dvc; + bool is_vi; }; -static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit); - static void dvc_writel(struct tegra_i2c_dev *i2c_dev, u32 val, - unsigned long reg) + unsigned int reg) { writel_relaxed(val, i2c_dev->base + reg); } -static u32 dvc_readl(struct tegra_i2c_dev *i2c_dev, unsigned long reg) +static u32 dvc_readl(struct tegra_i2c_dev *i2c_dev, unsigned int reg) { return readl_relaxed(i2c_dev->base + reg); } /* - * i2c_writel and i2c_readl will offset the register if necessary to talk - * to the I2C block inside the DVC block + * If necessary, i2c_writel() and i2c_readl() will offset the register + * in order to talk to the I2C block inside the DVC block. */ -static unsigned long tegra_i2c_reg_addr(struct tegra_i2c_dev *i2c_dev, - unsigned long reg) +static u32 tegra_i2c_reg_addr(struct tegra_i2c_dev *i2c_dev, unsigned int reg) { if (i2c_dev->is_dvc) reg += (reg >= I2C_TX_FIFO) ? 0x10 : 0x40; else if (i2c_dev->is_vi) reg = 0xc00 + (reg << 2); + return reg; } -static void i2c_writel(struct tegra_i2c_dev *i2c_dev, u32 val, - unsigned long reg) +static void i2c_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned int reg) { writel_relaxed(val, i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); - /* Read back register to make sure that register writes completed */ + /* read back register to make sure that register writes completed */ if (reg != I2C_TX_FIFO) readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); } -static u32 i2c_readl(struct tegra_i2c_dev *i2c_dev, unsigned long reg) +static u32 i2c_readl(struct tegra_i2c_dev *i2c_dev, unsigned int reg) { return readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); } static void i2c_writesl(struct tegra_i2c_dev *i2c_dev, void *data, - unsigned long reg, int len) + unsigned int reg, unsigned int len) { writesl(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg), data, len); } static void i2c_readsl(struct tegra_i2c_dev *i2c_dev, void *data, - unsigned long reg, int len) + unsigned int reg, unsigned int len) { readsl(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg), data, len); } @@ -377,21 +375,27 @@ static int tegra_i2c_dma_submit(struct tegra_i2c_dev *i2c_dev, size_t len) struct dma_chan *chan; dev_dbg(i2c_dev->dev, "starting DMA for length: %zu\n", len); + reinit_completion(&i2c_dev->dma_complete); + dir = i2c_dev->msg_read ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV; chan = i2c_dev->msg_read ? i2c_dev->rx_dma_chan : i2c_dev->tx_dma_chan; + dma_desc = dmaengine_prep_slave_single(chan, i2c_dev->dma_phys, len, dir, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!dma_desc) { - dev_err(i2c_dev->dev, "failed to get DMA descriptor\n"); + dev_err(i2c_dev->dev, "failed to get %s DMA descriptor\n", + i2c_dev->msg_read ? "RX" : "TX"); return -EINVAL; } dma_desc->callback = tegra_i2c_dma_complete; dma_desc->callback_param = i2c_dev; + dmaengine_submit(dma_desc); dma_async_issue_pending(chan); + return 0; } @@ -417,15 +421,15 @@ static void tegra_i2c_release_dma(struct tegra_i2c_dev *i2c_dev) static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) { struct dma_chan *chan; - u32 *dma_buf; dma_addr_t dma_phys; + u32 *dma_buf; int err; if (!i2c_dev->hw->has_apb_dma || i2c_dev->is_vi) return 0; if (!IS_ENABLED(CONFIG_TEGRA20_APB_DMA)) { - dev_dbg(i2c_dev->dev, "Support for APB DMA not enabled!\n"); + dev_dbg(i2c_dev->dev, "DMA support not enabled\n"); return 0; } @@ -445,16 +449,20 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) i2c_dev->tx_dma_chan = chan; + i2c_dev->dma_buf_size = i2c_dev->hw->quirks->max_write_len + + I2C_PACKET_HEADER_SIZE; + dma_buf = dma_alloc_coherent(i2c_dev->dev, i2c_dev->dma_buf_size, &dma_phys, GFP_KERNEL | __GFP_NOWARN); if (!dma_buf) { - dev_err(i2c_dev->dev, "failed to allocate the DMA buffer\n"); + dev_err(i2c_dev->dev, "failed to allocate DMA buffer\n"); err = -ENOMEM; goto err_out; } i2c_dev->dma_buf = dma_buf; i2c_dev->dma_phys = dma_phys; + return 0; err_out: @@ -468,171 +476,12 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) return err; } -static int tegra_i2c_flush_fifos(struct tegra_i2c_dev *i2c_dev) -{ - unsigned long timeout = jiffies + HZ; - unsigned int offset; - u32 mask, val; - - if (i2c_dev->hw->has_mst_fifo) { - mask = I2C_MST_FIFO_CONTROL_TX_FLUSH | - I2C_MST_FIFO_CONTROL_RX_FLUSH; - offset = I2C_MST_FIFO_CONTROL; - } else { - mask = I2C_FIFO_CONTROL_TX_FLUSH | - I2C_FIFO_CONTROL_RX_FLUSH; - offset = I2C_FIFO_CONTROL; - } - - val = i2c_readl(i2c_dev, offset); - val |= mask; - i2c_writel(i2c_dev, val, offset); - - while (i2c_readl(i2c_dev, offset) & mask) { - if (time_after(jiffies, timeout)) { - dev_warn(i2c_dev->dev, "timeout waiting for fifo flush\n"); - return -ETIMEDOUT; - } - usleep_range(1000, 2000); - } - return 0; -} - -static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) -{ - u32 val; - int rx_fifo_avail; - u8 *buf = i2c_dev->msg_buf; - size_t buf_remaining = i2c_dev->msg_buf_remaining; - int words_to_transfer; - - /* - * Catch overflow due to message fully sent - * before the check for RX FIFO availability. - */ - if (WARN_ON_ONCE(!(i2c_dev->msg_buf_remaining))) - return -EINVAL; - - if (i2c_dev->hw->has_mst_fifo) { - val = i2c_readl(i2c_dev, I2C_MST_FIFO_STATUS); - rx_fifo_avail = FIELD_GET(I2C_MST_FIFO_STATUS_RX, val); - } else { - val = i2c_readl(i2c_dev, I2C_FIFO_STATUS); - rx_fifo_avail = FIELD_GET(I2C_FIFO_STATUS_RX, val); - } - - /* Rounds down to not include partial word at the end of buf */ - words_to_transfer = buf_remaining / BYTES_PER_FIFO_WORD; - if (words_to_transfer > rx_fifo_avail) - words_to_transfer = rx_fifo_avail; - - i2c_readsl(i2c_dev, buf, I2C_RX_FIFO, words_to_transfer); - - buf += words_to_transfer * BYTES_PER_FIFO_WORD; - buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD; - rx_fifo_avail -= words_to_transfer; - - /* - * If there is a partial word at the end of buf, handle it manually to - * prevent overwriting past the end of buf - */ - if (rx_fifo_avail > 0 && buf_remaining > 0) { - /* - * buf_remaining > 3 check not needed as rx_fifo_avail == 0 - * when (words_to_transfer was > rx_fifo_avail) earlier - * in this function. - */ - val = i2c_readl(i2c_dev, I2C_RX_FIFO); - val = cpu_to_le32(val); - memcpy(buf, &val, buf_remaining); - buf_remaining = 0; - rx_fifo_avail--; - } - - /* RX FIFO must be drained, otherwise it's an Overflow case. */ - if (WARN_ON_ONCE(rx_fifo_avail)) - return -EINVAL; - - i2c_dev->msg_buf_remaining = buf_remaining; - i2c_dev->msg_buf = buf; - - return 0; -} - -static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) -{ - u32 val; - int tx_fifo_avail; - u8 *buf = i2c_dev->msg_buf; - size_t buf_remaining = i2c_dev->msg_buf_remaining; - int words_to_transfer; - - if (i2c_dev->hw->has_mst_fifo) { - val = i2c_readl(i2c_dev, I2C_MST_FIFO_STATUS); - tx_fifo_avail = FIELD_GET(I2C_MST_FIFO_STATUS_TX, val); - } else { - val = i2c_readl(i2c_dev, I2C_FIFO_STATUS); - tx_fifo_avail = FIELD_GET(I2C_FIFO_STATUS_TX, val); - } - - /* Rounds down to not include partial word at the end of buf */ - words_to_transfer = buf_remaining / BYTES_PER_FIFO_WORD; - - /* It's very common to have < 4 bytes, so optimize that case. */ - if (words_to_transfer) { - if (words_to_transfer > tx_fifo_avail) - words_to_transfer = tx_fifo_avail; - - /* - * Update state before writing to FIFO. If this casues us - * to finish writing all bytes (AKA buf_remaining goes to 0) we - * have a potential for an interrupt (PACKET_XFER_COMPLETE is - * not maskable). We need to make sure that the isr sees - * buf_remaining as 0 and doesn't call us back re-entrantly. - */ - buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD; - tx_fifo_avail -= words_to_transfer; - i2c_dev->msg_buf_remaining = buf_remaining; - i2c_dev->msg_buf = buf + - words_to_transfer * BYTES_PER_FIFO_WORD; - barrier(); - - i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); - - buf += words_to_transfer * BYTES_PER_FIFO_WORD; - } - - /* - * If there is a partial word at the end of buf, handle it manually to - * prevent reading past the end of buf, which could cross a page - * boundary and fault. - */ - if (tx_fifo_avail > 0 && buf_remaining > 0) { - /* - * buf_remaining > 3 check not needed as tx_fifo_avail == 0 - * when (words_to_transfer was > tx_fifo_avail) earlier - * in this function for non-zero words_to_transfer. - */ - memcpy(&val, buf, buf_remaining); - val = le32_to_cpu(val); - - /* Again update before writing to FIFO to make sure isr sees. */ - i2c_dev->msg_buf_remaining = 0; - i2c_dev->msg_buf = NULL; - barrier(); - - i2c_writel(i2c_dev, val, I2C_TX_FIFO); - } - - return 0; -} - /* * One of the Tegra I2C blocks is inside the DVC (Digital Voltage Controller) * block. This block is identical to the rest of the I2C blocks, except that * it only supports master mode, it has registers moved around, and it needs * some extra init to get it into I2C mode. The register moves are handled - * by i2c_readl and i2c_writel + * by i2c_readl() and i2c_writel(). */ static void tegra_dvc_init(struct tegra_i2c_dev *i2c_dev) { @@ -648,100 +497,6 @@ static void tegra_dvc_init(struct tegra_i2c_dev *i2c_dev) dvc_writel(i2c_dev, val, DVC_CTRL_REG1); } -static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) -{ - struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); - int ret; - - ret = pinctrl_pm_select_default_state(i2c_dev->dev); - if (ret) - return ret; - - ret = clk_enable(i2c_dev->fast_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, - "Enabling fast clk failed, err %d\n", ret); - return ret; - } - - ret = clk_enable(i2c_dev->slow_clk); - if (ret < 0) { - dev_err(dev, "failed to enable slow clock: %d\n", ret); - goto disable_fast_clk; - } - - ret = clk_enable(i2c_dev->div_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, - "Enabling div clk failed, err %d\n", ret); - goto disable_slow_clk; - } - - /* - * VI I2C device is attached to VE power domain which goes through - * power ON/OFF during PM runtime resume/suspend. So, controller - * should go through reset and need to re-initialize after power - * domain ON. - */ - if (i2c_dev->is_vi) { - ret = tegra_i2c_init(i2c_dev, true); - if (ret) - goto disable_div_clk; - } - - return 0; - -disable_div_clk: - clk_disable(i2c_dev->div_clk); -disable_slow_clk: - clk_disable(i2c_dev->slow_clk); -disable_fast_clk: - clk_disable(i2c_dev->fast_clk); - return ret; -} - -static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) -{ - struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); - - clk_disable(i2c_dev->div_clk); - clk_disable(i2c_dev->slow_clk); - clk_disable(i2c_dev->fast_clk); - - return pinctrl_pm_select_idle_state(i2c_dev->dev); -} - -static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) -{ - unsigned long reg_offset; - void __iomem *addr; - u32 val; - int err; - - if (i2c_dev->hw->has_config_load_reg) { - reg_offset = tegra_i2c_reg_addr(i2c_dev, I2C_CONFIG_LOAD); - addr = i2c_dev->base + reg_offset; - i2c_writel(i2c_dev, I2C_MSTR_CONFIG_LOAD, I2C_CONFIG_LOAD); - - if (i2c_dev->is_curr_atomic_xfer) - err = readl_relaxed_poll_timeout_atomic( - addr, val, val == 0, 1000, - I2C_CONFIG_LOAD_TIMEOUT); - else - err = readl_relaxed_poll_timeout( - addr, val, val == 0, 1000, - I2C_CONFIG_LOAD_TIMEOUT); - - if (err) { - dev_warn(i2c_dev->dev, - "timeout waiting for config load\n"); - return err; - } - } - - return 0; -} - static void tegra_i2c_vi_init(struct tegra_i2c_dev *i2c_dev) { u32 value; @@ -771,17 +526,83 @@ static void tegra_i2c_vi_init(struct tegra_i2c_dev *i2c_dev) i2c_writel(i2c_dev, 0x0, I2C_TLOW_SEXT); } -static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit) +static int tegra_i2c_poll_register(struct tegra_i2c_dev *i2c_dev, + u32 reg, u32 mask, u32 delay_us, + u32 timeout_us) { + void __iomem *addr = i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg); u32 val; - int err; - u32 clk_divisor, clk_multiplier; - u32 tsu_thd; - u8 tlow, thigh; - reset_control_assert(i2c_dev->rst); - udelay(2); - reset_control_deassert(i2c_dev->rst); + if (!i2c_dev->atomic_mode) + return readl_relaxed_poll_timeout(addr, val, !(val & mask), + delay_us, timeout_us); + + return readl_relaxed_poll_timeout_atomic(addr, val, !(val & mask), + delay_us, timeout_us); +} + +static int tegra_i2c_flush_fifos(struct tegra_i2c_dev *i2c_dev) +{ + u32 mask, val, offset; + int err; + + if (i2c_dev->hw->has_mst_fifo) { + mask = I2C_MST_FIFO_CONTROL_TX_FLUSH | + I2C_MST_FIFO_CONTROL_RX_FLUSH; + offset = I2C_MST_FIFO_CONTROL; + } else { + mask = I2C_FIFO_CONTROL_TX_FLUSH | + I2C_FIFO_CONTROL_RX_FLUSH; + offset = I2C_FIFO_CONTROL; + } + + val = i2c_readl(i2c_dev, offset); + val |= mask; + i2c_writel(i2c_dev, val, offset); + + err = tegra_i2c_poll_register(i2c_dev, offset, mask, 1000, 1000000); + if (err) { + dev_err(i2c_dev->dev, "failed to flush FIFO\n"); + return err; + } + + return 0; +} + +static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) +{ + int err; + + if (!i2c_dev->hw->has_config_load_reg) + return 0; + + i2c_writel(i2c_dev, I2C_MSTR_CONFIG_LOAD, I2C_CONFIG_LOAD); + + err = tegra_i2c_poll_register(i2c_dev, I2C_CONFIG_LOAD, 0xffffffff, + 1000, I2C_CONFIG_LOAD_TIMEOUT); + if (err) { + dev_err(i2c_dev->dev, "failed to load config\n"); + return err; + } + + return 0; +} + +static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) +{ + u32 val, clk_divisor, clk_multiplier, tsu_thd, tlow, thigh, non_hs_mode; + int err; + + /* + * The reset shouldn't ever fail in practice. The failure will be a + * sign of a severe problem that needs to be resolved. Still we don't + * want to fail the initialization completely because this may break + * kernel boot up since voltage regulators use I2C. Hence, we will + * emit a noisy warning on error, which won't stay unnoticed and + * won't hose machine entirely. + */ + err = reset_control_reset(i2c_dev->rst); + WARN_ON_ONCE(err); if (i2c_dev->is_dvc) tegra_dvc_init(i2c_dev); @@ -798,24 +619,33 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit) if (i2c_dev->is_vi) tegra_i2c_vi_init(i2c_dev); - /* Make sure clock divisor programmed correctly */ - clk_divisor = FIELD_PREP(I2C_CLK_DIVISOR_HSMODE, - i2c_dev->hw->clk_divisor_hs_mode) | - FIELD_PREP(I2C_CLK_DIVISOR_STD_FAST_MODE, - i2c_dev->clk_divisor_non_hs_mode); - i2c_writel(i2c_dev, clk_divisor, I2C_CLK_DIVISOR); - - if (i2c_dev->bus_clk_rate > I2C_MAX_STANDARD_MODE_FREQ && - i2c_dev->bus_clk_rate <= I2C_MAX_FAST_MODE_PLUS_FREQ) { + switch (i2c_dev->bus_clk_rate) { + case I2C_MAX_STANDARD_MODE_FREQ + 1 ... I2C_MAX_FAST_MODE_PLUS_FREQ: + default: tlow = i2c_dev->hw->tlow_fast_fastplus_mode; thigh = i2c_dev->hw->thigh_fast_fastplus_mode; tsu_thd = i2c_dev->hw->setup_hold_time_fast_fast_plus_mode; - } else { + + if (i2c_dev->bus_clk_rate > I2C_MAX_FAST_MODE_FREQ) + non_hs_mode = i2c_dev->hw->clk_divisor_fast_plus_mode; + else + non_hs_mode = i2c_dev->hw->clk_divisor_fast_mode; + break; + + case 0 ... I2C_MAX_STANDARD_MODE_FREQ: tlow = i2c_dev->hw->tlow_std_mode; thigh = i2c_dev->hw->thigh_std_mode; tsu_thd = i2c_dev->hw->setup_hold_time_std_mode; + non_hs_mode = i2c_dev->hw->clk_divisor_std_mode; + break; } + /* make sure clock divisor programmed correctly */ + clk_divisor = FIELD_PREP(I2C_CLK_DIVISOR_HSMODE, + i2c_dev->hw->clk_divisor_hs_mode) | + FIELD_PREP(I2C_CLK_DIVISOR_STD_FAST_MODE, non_hs_mode); + i2c_writel(i2c_dev, clk_divisor, I2C_CLK_DIVISOR); + if (i2c_dev->hw->has_interface_timing_reg) { val = FIELD_PREP(I2C_INTERFACE_TIMING_THIGH, thigh) | FIELD_PREP(I2C_INTERFACE_TIMING_TLOW, tlow); @@ -823,22 +653,19 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit) } /* - * configure setup and hold times only when tsu_thd is non-zero. - * otherwise, preserve the chip default values + * Configure setup and hold times only when tsu_thd is non-zero. + * Otherwise, preserve the chip default values. */ if (i2c_dev->hw->has_interface_timing_reg && tsu_thd) i2c_writel(i2c_dev, tsu_thd, I2C_INTERFACE_TIMING_1); - if (!clk_reinit) { - clk_multiplier = (tlow + thigh + 2); - clk_multiplier *= (i2c_dev->clk_divisor_non_hs_mode + 1); - err = clk_set_rate(i2c_dev->div_clk, - i2c_dev->bus_clk_rate * clk_multiplier); - if (err) { - dev_err(i2c_dev->dev, - "failed changing clock rate: %d\n", err); - return err; - } + clk_multiplier = (tlow + thigh + 2) * (non_hs_mode + 1); + + err = clk_set_rate(i2c_dev->div_clk, + i2c_dev->bus_clk_rate * clk_multiplier); + if (err) { + dev_err(i2c_dev->dev, "failed to set div-clk rate: %d\n", err); + return err; } if (!i2c_dev->is_dvc && !i2c_dev->is_vi) { @@ -854,7 +681,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit) if (err) return err; - if (i2c_dev->is_multimaster_mode && i2c_dev->hw->has_slcg_override_reg) + if (i2c_dev->multimaster_mode && i2c_dev->hw->has_slcg_override_reg) i2c_writel(i2c_dev, I2C_MST_CORE_CLKEN_OVR, I2C_CLKEN_OVERRIDE); err = tegra_i2c_wait_for_config_load(i2c_dev); @@ -870,7 +697,7 @@ static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev) /* * NACK interrupt is generated before the I2C controller generates - * the STOP condition on the bus. So wait for 2 clock periods + * the STOP condition on the bus. So, wait for 2 clock periods * before disabling the controller so that the STOP condition has * been delivered properly. */ @@ -883,16 +710,145 @@ static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev) return tegra_i2c_wait_for_config_load(i2c_dev); } +static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) +{ + size_t buf_remaining = i2c_dev->msg_buf_remaining; + unsigned int words_to_transfer, rx_fifo_avail; + u8 *buf = i2c_dev->msg_buf; + u32 val; + + /* + * Catch overflow due to message fully sent before the check for + * RX FIFO availability. + */ + if (WARN_ON_ONCE(!(i2c_dev->msg_buf_remaining))) + return -EINVAL; + + if (i2c_dev->hw->has_mst_fifo) { + val = i2c_readl(i2c_dev, I2C_MST_FIFO_STATUS); + rx_fifo_avail = FIELD_GET(I2C_MST_FIFO_STATUS_RX, val); + } else { + val = i2c_readl(i2c_dev, I2C_FIFO_STATUS); + rx_fifo_avail = FIELD_GET(I2C_FIFO_STATUS_RX, val); + } + + /* round down to exclude partial word at the end of buffer */ + words_to_transfer = buf_remaining / BYTES_PER_FIFO_WORD; + if (words_to_transfer > rx_fifo_avail) + words_to_transfer = rx_fifo_avail; + + i2c_readsl(i2c_dev, buf, I2C_RX_FIFO, words_to_transfer); + + buf += words_to_transfer * BYTES_PER_FIFO_WORD; + buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD; + rx_fifo_avail -= words_to_transfer; + + /* + * If there is a partial word at the end of buffer, handle it + * manually to prevent overwriting past the end of buffer. + */ + if (rx_fifo_avail > 0 && buf_remaining > 0) { + /* + * buf_remaining > 3 check not needed as rx_fifo_avail == 0 + * when (words_to_transfer was > rx_fifo_avail) earlier + * in this function. + */ + val = i2c_readl(i2c_dev, I2C_RX_FIFO); + val = cpu_to_le32(val); + memcpy(buf, &val, buf_remaining); + buf_remaining = 0; + rx_fifo_avail--; + } + + /* RX FIFO must be drained, otherwise it's an Overflow case. */ + if (WARN_ON_ONCE(rx_fifo_avail)) + return -EINVAL; + + i2c_dev->msg_buf_remaining = buf_remaining; + i2c_dev->msg_buf = buf; + + return 0; +} + +static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) +{ + size_t buf_remaining = i2c_dev->msg_buf_remaining; + unsigned int words_to_transfer, tx_fifo_avail; + u8 *buf = i2c_dev->msg_buf; + u32 val; + + if (i2c_dev->hw->has_mst_fifo) { + val = i2c_readl(i2c_dev, I2C_MST_FIFO_STATUS); + tx_fifo_avail = FIELD_GET(I2C_MST_FIFO_STATUS_TX, val); + } else { + val = i2c_readl(i2c_dev, I2C_FIFO_STATUS); + tx_fifo_avail = FIELD_GET(I2C_FIFO_STATUS_TX, val); + } + + /* round down to exclude partial word at the end of buffer */ + words_to_transfer = buf_remaining / BYTES_PER_FIFO_WORD; + + /* + * This hunk pushes 4 bytes at a time into the TX FIFO. + * + * It's very common to have < 4 bytes, hence there is no word + * to push if we have less than 4 bytes to transfer. + */ + if (words_to_transfer) { + if (words_to_transfer > tx_fifo_avail) + words_to_transfer = tx_fifo_avail; + + /* + * Update state before writing to FIFO. Note that this may + * cause us to finish writing all bytes (AKA buf_remaining + * goes to 0), hence we have a potential for an interrupt + * (PACKET_XFER_COMPLETE is not maskable), but GIC interrupt + * is disabled at this point. + */ + buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD; + tx_fifo_avail -= words_to_transfer; + + i2c_dev->msg_buf_remaining = buf_remaining; + i2c_dev->msg_buf = buf + words_to_transfer * BYTES_PER_FIFO_WORD; + + i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); + + buf += words_to_transfer * BYTES_PER_FIFO_WORD; + } + + /* + * If there is a partial word at the end of buffer, handle it manually + * to prevent reading past the end of buffer, which could cross a page + * boundary and fault. + */ + if (tx_fifo_avail > 0 && buf_remaining > 0) { + /* + * buf_remaining > 3 check not needed as tx_fifo_avail == 0 + * when (words_to_transfer was > tx_fifo_avail) earlier + * in this function for non-zero words_to_transfer. + */ + memcpy(&val, buf, buf_remaining); + val = le32_to_cpu(val); + + i2c_dev->msg_buf_remaining = 0; + i2c_dev->msg_buf = NULL; + + i2c_writel(i2c_dev, val, I2C_TX_FIFO); + } + + return 0; +} + static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) { - u32 status; const u32 status_err = I2C_INT_NO_ACK | I2C_INT_ARBITRATION_LOST; struct tegra_i2c_dev *i2c_dev = dev_id; + u32 status; status = i2c_readl(i2c_dev, I2C_INT_STATUS); if (status == 0) { - dev_warn(i2c_dev->dev, "irq status 0 %08x %08x %08x\n", + dev_warn(i2c_dev->dev, "IRQ status 0 %08x %08x %08x\n", i2c_readl(i2c_dev, I2C_PACKET_TRANSFER_STATUS), i2c_readl(i2c_dev, I2C_STATUS), i2c_readl(i2c_dev, I2C_CNFG)); @@ -900,7 +856,7 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) goto err; } - if (unlikely(status & status_err)) { + if (status & status_err) { tegra_i2c_disable_packet_mode(i2c_dev); if (status & I2C_INT_NO_ACK) i2c_dev->msg_err |= I2C_ERR_NO_ACK; @@ -910,13 +866,13 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) } /* - * I2C transfer is terminated during the bus clear so skip + * I2C transfer is terminated during the bus clear, so skip * processing the other interrupts. */ if (i2c_dev->hw->supports_bus_clear && (status & I2C_INT_BUS_CLR_DONE)) goto err; - if (!i2c_dev->is_curr_dma_xfer) { + if (!i2c_dev->dma_mode) { if (i2c_dev->msg_read && (status & I2C_INT_RX_FIFO_DATA_REQ)) { if (tegra_i2c_empty_rx_fifo(i2c_dev)) { /* @@ -946,11 +902,12 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) * During message read XFER_COMPLETE interrupt is triggered prior to * DMA completion and during message write XFER_COMPLETE interrupt is * triggered after DMA completion. - * PACKETS_XFER_COMPLETE indicates completion of all bytes of transfer. + * + * PACKETS_XFER_COMPLETE indicates completion of all bytes of transfer, * so forcing msg_buf_remaining to 0 in DMA mode. */ if (status & I2C_INT_PACKET_XFER_COMPLETE) { - if (i2c_dev->is_curr_dma_xfer) + if (i2c_dev->dma_mode) i2c_dev->msg_buf_remaining = 0; /* * Underflow error condition: XFER_COMPLETE before message @@ -964,17 +921,23 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) } goto done; err: - /* An error occurred, mask all interrupts */ - tegra_i2c_mask_irq(i2c_dev, I2C_INT_NO_ACK | I2C_INT_ARBITRATION_LOST | - I2C_INT_PACKET_XFER_COMPLETE | I2C_INT_TX_FIFO_DATA_REQ | - I2C_INT_RX_FIFO_DATA_REQ); + /* mask all interrupts on error */ + tegra_i2c_mask_irq(i2c_dev, + I2C_INT_NO_ACK | + I2C_INT_ARBITRATION_LOST | + I2C_INT_PACKET_XFER_COMPLETE | + I2C_INT_TX_FIFO_DATA_REQ | + I2C_INT_RX_FIFO_DATA_REQ); + if (i2c_dev->hw->supports_bus_clear) tegra_i2c_mask_irq(i2c_dev, I2C_INT_BUS_CLR_DONE); + i2c_writel(i2c_dev, status, I2C_INT_STATUS); + if (i2c_dev->is_dvc) dvc_writel(i2c_dev, DVC_STATUS_I2C_DONE_INTR, DVC_STATUS); - if (i2c_dev->is_curr_dma_xfer) { + if (i2c_dev->dma_mode) { if (i2c_dev->msg_read) dmaengine_terminate_async(i2c_dev->rx_dma_chan); else @@ -991,19 +954,17 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, size_t len) { - u32 val, reg; - u8 dma_burst; struct dma_slave_config slv_config = {0}; + u32 val, reg, dma_burst, reg_offset; struct dma_chan *chan; - int ret; - unsigned long reg_offset; + int err; if (i2c_dev->hw->has_mst_fifo) reg = I2C_MST_FIFO_CONTROL; else reg = I2C_FIFO_CONTROL; - if (i2c_dev->is_curr_dma_xfer) { + if (i2c_dev->dma_mode) { if (len & 0xF) dma_burst = 1; else if (len & 0x10) @@ -1014,6 +975,7 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->msg_read) { chan = i2c_dev->rx_dma_chan; reg_offset = tegra_i2c_reg_addr(i2c_dev, I2C_RX_FIFO); + slv_config.src_addr = i2c_dev->base_phys + reg_offset; slv_config.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; slv_config.src_maxburst = dma_burst; @@ -1025,6 +987,7 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, } else { chan = i2c_dev->tx_dma_chan; reg_offset = tegra_i2c_reg_addr(i2c_dev, I2C_TX_FIFO); + slv_config.dst_addr = i2c_dev->base_phys + reg_offset; slv_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; slv_config.dst_maxburst = dma_burst; @@ -1036,13 +999,13 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, } slv_config.device_fc = true; - ret = dmaengine_slave_config(chan, &slv_config); - if (ret < 0) { - dev_err(i2c_dev->dev, "DMA slave config failed: %d\n", - ret); + err = dmaengine_slave_config(chan, &slv_config); + if (err) { + dev_err(i2c_dev->dev, "DMA config failed: %d\n", err); dev_err(i2c_dev->dev, "falling back to PIO\n"); + tegra_i2c_release_dma(i2c_dev); - i2c_dev->is_curr_dma_xfer = false; + i2c_dev->dma_mode = false; } else { goto out; } @@ -1058,10 +1021,9 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, i2c_writel(i2c_dev, val, reg); } -static unsigned long -tegra_i2c_poll_completion_timeout(struct tegra_i2c_dev *i2c_dev, - struct completion *complete, - unsigned int timeout_ms) +static unsigned long tegra_i2c_poll_completion(struct tegra_i2c_dev *i2c_dev, + struct completion *complete, + unsigned int timeout_ms) { ktime_t ktime = ktime_get(); ktime_t ktimeout = ktime_add_ms(ktime, timeout_ms); @@ -1085,16 +1047,14 @@ tegra_i2c_poll_completion_timeout(struct tegra_i2c_dev *i2c_dev, return 0; } -static unsigned long -tegra_i2c_wait_completion_timeout(struct tegra_i2c_dev *i2c_dev, - struct completion *complete, - unsigned int timeout_ms) +static unsigned long tegra_i2c_wait_completion(struct tegra_i2c_dev *i2c_dev, + struct completion *complete, + unsigned int timeout_ms) { unsigned long ret; - if (i2c_dev->is_curr_atomic_xfer) { - ret = tegra_i2c_poll_completion_timeout(i2c_dev, complete, - timeout_ms); + if (i2c_dev->atomic_mode) { + ret = tegra_i2c_poll_completion(i2c_dev, complete, timeout_ms); } else { enable_irq(i2c_dev->irq); ret = wait_for_completion_timeout(complete, @@ -1112,8 +1072,7 @@ tegra_i2c_wait_completion_timeout(struct tegra_i2c_dev *i2c_dev, * needs to be checked after timeout. */ if (ret == 0) - ret = tegra_i2c_poll_completion_timeout(i2c_dev, - complete, 0); + ret = tegra_i2c_poll_completion(i2c_dev, complete, 0); } return ret; @@ -1122,60 +1081,134 @@ tegra_i2c_wait_completion_timeout(struct tegra_i2c_dev *i2c_dev, static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) { struct tegra_i2c_dev *i2c_dev = i2c_get_adapdata(adap); + u32 val, time_left; int err; - unsigned long time_left; - u32 reg; reinit_completion(&i2c_dev->msg_complete); - reg = FIELD_PREP(I2C_BC_SCLK_THRESHOLD, 9) | I2C_BC_STOP_COND | - I2C_BC_TERMINATE; - i2c_writel(i2c_dev, reg, I2C_BUS_CLEAR_CNFG); - if (i2c_dev->hw->has_config_load_reg) { - err = tegra_i2c_wait_for_config_load(i2c_dev); - if (err) - return err; - } - reg |= I2C_BC_ENABLE; - i2c_writel(i2c_dev, reg, I2C_BUS_CLEAR_CNFG); + val = FIELD_PREP(I2C_BC_SCLK_THRESHOLD, 9) | I2C_BC_STOP_COND | + I2C_BC_TERMINATE; + i2c_writel(i2c_dev, val, I2C_BUS_CLEAR_CNFG); + + err = tegra_i2c_wait_for_config_load(i2c_dev); + if (err) + return err; + + val |= I2C_BC_ENABLE; + i2c_writel(i2c_dev, val, I2C_BUS_CLEAR_CNFG); tegra_i2c_unmask_irq(i2c_dev, I2C_INT_BUS_CLR_DONE); - time_left = tegra_i2c_wait_completion_timeout( - i2c_dev, &i2c_dev->msg_complete, 50); + time_left = tegra_i2c_wait_completion(i2c_dev, &i2c_dev->msg_complete, 50); + tegra_i2c_mask_irq(i2c_dev, I2C_INT_BUS_CLR_DONE); + if (time_left == 0) { - dev_err(i2c_dev->dev, "timed out for bus clear\n"); + dev_err(i2c_dev->dev, "failed to clear bus\n"); return -ETIMEDOUT; } - reg = i2c_readl(i2c_dev, I2C_BUS_CLEAR_STATUS); - if (!(reg & I2C_BC_STATUS)) { - dev_err(i2c_dev->dev, - "un-recovered arbitration lost\n"); + val = i2c_readl(i2c_dev, I2C_BUS_CLEAR_STATUS); + if (!(val & I2C_BC_STATUS)) { + dev_err(i2c_dev->dev, "un-recovered arbitration lost\n"); return -EIO; } return -EAGAIN; } +static void tegra_i2c_push_packet_header(struct tegra_i2c_dev *i2c_dev, + struct i2c_msg *msg, + enum msg_end_type end_state) +{ + u32 *dma_buf = i2c_dev->dma_buf; + u32 packet_header; + + packet_header = FIELD_PREP(PACKET_HEADER0_HEADER_SIZE, 0) | + FIELD_PREP(PACKET_HEADER0_PROTOCOL, + PACKET_HEADER0_PROTOCOL_I2C) | + FIELD_PREP(PACKET_HEADER0_CONT_ID, i2c_dev->cont_id) | + FIELD_PREP(PACKET_HEADER0_PACKET_ID, 1); + + if (i2c_dev->dma_mode && !i2c_dev->msg_read) + *dma_buf++ = packet_header; + else + i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); + + packet_header = msg->len - 1; + + if (i2c_dev->dma_mode && !i2c_dev->msg_read) + *dma_buf++ = packet_header; + else + i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); + + packet_header = I2C_HEADER_IE_ENABLE; + + if (end_state == MSG_END_CONTINUE) + packet_header |= I2C_HEADER_CONTINUE_XFER; + else if (end_state == MSG_END_REPEAT_START) + packet_header |= I2C_HEADER_REPEAT_START; + + if (msg->flags & I2C_M_TEN) { + packet_header |= msg->addr; + packet_header |= I2C_HEADER_10BIT_ADDR; + } else { + packet_header |= msg->addr << I2C_HEADER_SLAVE_ADDR_SHIFT; + } + + if (msg->flags & I2C_M_IGNORE_NAK) + packet_header |= I2C_HEADER_CONT_ON_NAK; + + if (msg->flags & I2C_M_RD) + packet_header |= I2C_HEADER_READ; + + if (i2c_dev->dma_mode && !i2c_dev->msg_read) + *dma_buf++ = packet_header; + else + i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); +} + +static int tegra_i2c_error_recover(struct tegra_i2c_dev *i2c_dev, + struct i2c_msg *msg) +{ + if (i2c_dev->msg_err == I2C_ERR_NONE) + return 0; + + tegra_i2c_init(i2c_dev); + + /* start recovery upon arbitration loss in single master mode */ + if (i2c_dev->msg_err == I2C_ERR_ARBITRATION_LOST) { + if (!i2c_dev->multimaster_mode) + return i2c_recover_bus(&i2c_dev->adapter); + + return -EAGAIN; + } + + if (i2c_dev->msg_err == I2C_ERR_NO_ACK) { + if (msg->flags & I2C_M_IGNORE_NAK) + return 0; + + return -EREMOTEIO; + } + + return -EIO; +} + static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, struct i2c_msg *msg, enum msg_end_type end_state) { - u32 packet_header; - u32 int_mask; - unsigned long time_left; + unsigned long time_left, xfer_time = 100; size_t xfer_size; - u32 *buffer = NULL; - int err = 0; - bool dma; - u16 xfer_time = 100; + u32 int_mask; + int err; - tegra_i2c_flush_fifos(i2c_dev); + err = tegra_i2c_flush_fifos(i2c_dev); + if (err) + return err; i2c_dev->msg_buf = msg->buf; i2c_dev->msg_buf_remaining = msg->len; i2c_dev->msg_err = I2C_ERR_NONE; - i2c_dev->msg_read = (msg->flags & I2C_M_RD); + i2c_dev->msg_read = !!(msg->flags & I2C_M_RD); reinit_completion(&i2c_dev->msg_complete); if (i2c_dev->msg_read) @@ -1184,93 +1217,52 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, xfer_size = msg->len + I2C_PACKET_HEADER_SIZE; xfer_size = ALIGN(xfer_size, BYTES_PER_FIFO_WORD); - i2c_dev->is_curr_dma_xfer = (xfer_size > I2C_PIO_MODE_PREFERRED_LEN) && - i2c_dev->dma_buf && - !i2c_dev->is_curr_atomic_xfer; + + i2c_dev->dma_mode = xfer_size > I2C_PIO_MODE_PREFERRED_LEN && + i2c_dev->dma_buf && !i2c_dev->atomic_mode; + tegra_i2c_config_fifo_trig(i2c_dev, xfer_size); - dma = i2c_dev->is_curr_dma_xfer; + /* * Transfer time in mSec = Total bits / transfer rate * Total bits = 9 bits per byte (including ACK bit) + Start & stop bits */ xfer_time += DIV_ROUND_CLOSEST(((xfer_size * 9) + 2) * MSEC_PER_SEC, - i2c_dev->bus_clk_rate); + i2c_dev->bus_clk_rate); int_mask = I2C_INT_NO_ACK | I2C_INT_ARBITRATION_LOST; tegra_i2c_unmask_irq(i2c_dev, int_mask); - if (dma) { + + if (i2c_dev->dma_mode) { if (i2c_dev->msg_read) { dma_sync_single_for_device(i2c_dev->dev, i2c_dev->dma_phys, - xfer_size, - DMA_FROM_DEVICE); - err = tegra_i2c_dma_submit(i2c_dev, xfer_size); - if (err < 0) { - dev_err(i2c_dev->dev, - "starting RX DMA failed, err %d\n", - err); - return err; - } + xfer_size, DMA_FROM_DEVICE); + err = tegra_i2c_dma_submit(i2c_dev, xfer_size); + if (err) + return err; } else { dma_sync_single_for_cpu(i2c_dev->dev, i2c_dev->dma_phys, - xfer_size, - DMA_TO_DEVICE); - buffer = i2c_dev->dma_buf; + xfer_size, DMA_TO_DEVICE); } } - packet_header = FIELD_PREP(PACKET_HEADER0_HEADER_SIZE, 0) | - FIELD_PREP(PACKET_HEADER0_PROTOCOL, - PACKET_HEADER0_PROTOCOL_I2C) | - FIELD_PREP(PACKET_HEADER0_CONT_ID, i2c_dev->cont_id) | - FIELD_PREP(PACKET_HEADER0_PACKET_ID, 1); - if (dma && !i2c_dev->msg_read) - *buffer++ = packet_header; - else - i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); - - packet_header = msg->len - 1; - if (dma && !i2c_dev->msg_read) - *buffer++ = packet_header; - else - i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); - - packet_header = I2C_HEADER_IE_ENABLE; - if (end_state == MSG_END_CONTINUE) - packet_header |= I2C_HEADER_CONTINUE_XFER; - else if (end_state == MSG_END_REPEAT_START) - packet_header |= I2C_HEADER_REPEAT_START; - if (msg->flags & I2C_M_TEN) { - packet_header |= msg->addr; - packet_header |= I2C_HEADER_10BIT_ADDR; - } else { - packet_header |= msg->addr << I2C_HEADER_SLAVE_ADDR_SHIFT; - } - if (msg->flags & I2C_M_IGNORE_NAK) - packet_header |= I2C_HEADER_CONT_ON_NAK; - if (msg->flags & I2C_M_RD) - packet_header |= I2C_HEADER_READ; - if (dma && !i2c_dev->msg_read) - *buffer++ = packet_header; - else - i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); + tegra_i2c_push_packet_header(i2c_dev, msg, end_state); if (!i2c_dev->msg_read) { - if (dma) { - memcpy(buffer, msg->buf, msg->len); + if (i2c_dev->dma_mode) { + memcpy(i2c_dev->dma_buf + I2C_PACKET_HEADER_SIZE, + msg->buf, msg->len); + dma_sync_single_for_device(i2c_dev->dev, i2c_dev->dma_phys, - xfer_size, - DMA_TO_DEVICE); + xfer_size, DMA_TO_DEVICE); + err = tegra_i2c_dma_submit(i2c_dev, xfer_size); - if (err < 0) { - dev_err(i2c_dev->dev, - "starting TX DMA failed, err %d\n", - err); + if (err) return err; - } } else { tegra_i2c_fill_tx_fifo(i2c_dev); } @@ -1278,7 +1270,8 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->hw->has_per_pkt_xfer_complete_irq) int_mask |= I2C_INT_PACKET_XFER_COMPLETE; - if (!dma) { + + if (!i2c_dev->dma_mode) { if (msg->flags & I2C_M_RD) int_mask |= I2C_INT_RX_FIFO_DATA_REQ; else if (i2c_dev->msg_buf_remaining) @@ -1286,12 +1279,13 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, } tegra_i2c_unmask_irq(i2c_dev, int_mask); - dev_dbg(i2c_dev->dev, "unmasked irq: %02x\n", + dev_dbg(i2c_dev->dev, "unmasked IRQ: %02x\n", i2c_readl(i2c_dev, I2C_INT_MASK)); - if (dma) { - time_left = tegra_i2c_wait_completion_timeout( - i2c_dev, &i2c_dev->dma_complete, xfer_time); + if (i2c_dev->dma_mode) { + time_left = tegra_i2c_wait_completion(i2c_dev, + &i2c_dev->dma_complete, + xfer_time); /* * Synchronize DMA first, since dmaengine_terminate_sync() @@ -1307,29 +1301,28 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, i2c_dev->tx_dma_chan); if (!time_left && !completion_done(&i2c_dev->dma_complete)) { - dev_err(i2c_dev->dev, "DMA transfer timeout\n"); - tegra_i2c_init(i2c_dev, true); + dev_err(i2c_dev->dev, "DMA transfer timed out\n"); + tegra_i2c_init(i2c_dev); return -ETIMEDOUT; } if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE) { dma_sync_single_for_cpu(i2c_dev->dev, i2c_dev->dma_phys, - xfer_size, - DMA_FROM_DEVICE); - memcpy(i2c_dev->msg_buf, i2c_dev->dma_buf, - msg->len); + xfer_size, DMA_FROM_DEVICE); + + memcpy(i2c_dev->msg_buf, i2c_dev->dma_buf, msg->len); } } - time_left = tegra_i2c_wait_completion_timeout( - i2c_dev, &i2c_dev->msg_complete, xfer_time); + time_left = tegra_i2c_wait_completion(i2c_dev, &i2c_dev->msg_complete, + xfer_time); tegra_i2c_mask_irq(i2c_dev, int_mask); if (time_left == 0) { - dev_err(i2c_dev->dev, "i2c transfer timed out\n"); - tegra_i2c_init(i2c_dev, true); + dev_err(i2c_dev->dev, "I2C transfer timed out\n"); + tegra_i2c_init(i2c_dev); return -ETIMEDOUT; } @@ -1337,37 +1330,25 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, time_left, completion_done(&i2c_dev->msg_complete), i2c_dev->msg_err); - i2c_dev->is_curr_dma_xfer = false; - if (likely(i2c_dev->msg_err == I2C_ERR_NONE)) - return 0; + i2c_dev->dma_mode = false; - tegra_i2c_init(i2c_dev, true); - /* start recovery upon arbitration loss in single master mode */ - if (i2c_dev->msg_err == I2C_ERR_ARBITRATION_LOST) { - if (!i2c_dev->is_multimaster_mode) - return i2c_recover_bus(&i2c_dev->adapter); - return -EAGAIN; - } + err = tegra_i2c_error_recover(i2c_dev, msg); + if (err) + return err; - if (i2c_dev->msg_err == I2C_ERR_NO_ACK) { - if (msg->flags & I2C_M_IGNORE_NAK) - return 0; - return -EREMOTEIO; - } - - return -EIO; + return 0; } static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) { struct tegra_i2c_dev *i2c_dev = i2c_get_adapdata(adap); - int i; - int ret; + int i, ret; ret = pm_runtime_get_sync(i2c_dev->dev); if (ret < 0) { dev_err(i2c_dev->dev, "runtime resume failed %d\n", ret); + pm_runtime_put_noidle(i2c_dev->dev); return ret; } @@ -1375,6 +1356,7 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], enum msg_end_type end_type = MSG_END_STOP; if (i < (num - 1)) { + /* check whether follow up message is coming */ if (msgs[i + 1].flags & I2C_M_NOSTART) end_type = MSG_END_CONTINUE; else @@ -1396,9 +1378,9 @@ static int tegra_i2c_xfer_atomic(struct i2c_adapter *adap, struct tegra_i2c_dev *i2c_dev = i2c_get_adapdata(adap); int ret; - i2c_dev->is_curr_atomic_xfer = true; + i2c_dev->atomic_mode = true; ret = tegra_i2c_xfer(adap, msgs, num); - i2c_dev->is_curr_atomic_xfer = false; + i2c_dev->atomic_mode = false; return ret; } @@ -1411,24 +1393,10 @@ static u32 tegra_i2c_func(struct i2c_adapter *adap) if (i2c_dev->hw->has_continue_xfer_support) ret |= I2C_FUNC_NOSTART; + return ret; } -static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev) -{ - struct device_node *np = i2c_dev->dev->of_node; - int ret; - bool multi_mode; - - ret = of_property_read_u32(np, "clock-frequency", - &i2c_dev->bus_clk_rate); - if (ret) - i2c_dev->bus_clk_rate = I2C_MAX_STANDARD_MODE_FREQ; /* default clock rate */ - - multi_mode = of_property_read_bool(np, "multi-master"); - i2c_dev->is_multimaster_mode = multi_mode; -} - static const struct i2c_algorithm tegra_i2c_algo = { .master_xfer = tegra_i2c_xfer, .master_xfer_atomic = tegra_i2c_xfer_atomic, @@ -1454,7 +1422,6 @@ static struct i2c_bus_recovery_info tegra_i2c_recovery_info = { static const struct tegra_i2c_hw_feature tegra20_i2c_hw = { .has_continue_xfer_support = false, .has_per_pkt_xfer_complete_irq = false, - .has_single_clk_source = false, .clk_divisor_hs_mode = 3, .clk_divisor_std_mode = 0, .clk_divisor_fast_mode = 0, @@ -1479,7 +1446,6 @@ static const struct tegra_i2c_hw_feature tegra20_i2c_hw = { static const struct tegra_i2c_hw_feature tegra30_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = false, - .has_single_clk_source = false, .clk_divisor_hs_mode = 3, .clk_divisor_std_mode = 0, .clk_divisor_fast_mode = 0, @@ -1504,7 +1470,6 @@ static const struct tegra_i2c_hw_feature tegra30_i2c_hw = { static const struct tegra_i2c_hw_feature tegra114_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = true, - .has_single_clk_source = true, .clk_divisor_hs_mode = 1, .clk_divisor_std_mode = 0x19, .clk_divisor_fast_mode = 0x19, @@ -1529,7 +1494,6 @@ static const struct tegra_i2c_hw_feature tegra114_i2c_hw = { static const struct tegra_i2c_hw_feature tegra124_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = true, - .has_single_clk_source = true, .clk_divisor_hs_mode = 1, .clk_divisor_std_mode = 0x19, .clk_divisor_fast_mode = 0x19, @@ -1554,7 +1518,6 @@ static const struct tegra_i2c_hw_feature tegra124_i2c_hw = { static const struct tegra_i2c_hw_feature tegra210_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = true, - .has_single_clk_source = true, .clk_divisor_hs_mode = 1, .clk_divisor_std_mode = 0x19, .clk_divisor_fast_mode = 0x19, @@ -1579,7 +1542,6 @@ static const struct tegra_i2c_hw_feature tegra210_i2c_hw = { static const struct tegra_i2c_hw_feature tegra186_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = true, - .has_single_clk_source = true, .clk_divisor_hs_mode = 1, .clk_divisor_std_mode = 0x16, .clk_divisor_fast_mode = 0x19, @@ -1604,7 +1566,6 @@ static const struct tegra_i2c_hw_feature tegra186_i2c_hw = { static const struct tegra_i2c_hw_feature tegra194_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = true, - .has_single_clk_source = true, .clk_divisor_hs_mode = 1, .clk_divisor_std_mode = 0x4f, .clk_divisor_fast_mode = 0x3c, @@ -1626,7 +1587,6 @@ static const struct tegra_i2c_hw_feature tegra194_i2c_hw = { .has_interface_timing_reg = true, }; -/* Match table for of_platform binding */ static const struct of_device_id tegra_i2c_of_match[] = { { .compatible = "nvidia,tegra194-i2c", .data = &tegra194_i2c_hw, }, { .compatible = "nvidia,tegra186-i2c", .data = &tegra186_i2c_hw, }, @@ -1641,223 +1601,196 @@ static const struct of_device_id tegra_i2c_of_match[] = { }; MODULE_DEVICE_TABLE(of, tegra_i2c_of_match); -static int tegra_i2c_probe(struct platform_device *pdev) +static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev) +{ + struct device_node *np = i2c_dev->dev->of_node; + bool multi_mode; + int err; + + err = of_property_read_u32(np, "clock-frequency", + &i2c_dev->bus_clk_rate); + if (err) + i2c_dev->bus_clk_rate = I2C_MAX_STANDARD_MODE_FREQ; + + multi_mode = of_property_read_bool(np, "multi-master"); + i2c_dev->multimaster_mode = multi_mode; + + if (of_device_is_compatible(np, "nvidia,tegra20-i2c-dvc")) + i2c_dev->is_dvc = true; + + if (of_device_is_compatible(np, "nvidia,tegra210-i2c-vi")) + i2c_dev->is_vi = true; +} + +static int tegra_i2c_init_clocks(struct tegra_i2c_dev *i2c_dev) +{ + int err; + + i2c_dev->clocks[i2c_dev->nclocks++].id = "div-clk"; + + if (i2c_dev->hw == &tegra20_i2c_hw || i2c_dev->hw == &tegra30_i2c_hw) + i2c_dev->clocks[i2c_dev->nclocks++].id = "fast-clk"; + + if (i2c_dev->is_vi) + i2c_dev->clocks[i2c_dev->nclocks++].id = "slow"; + + err = devm_clk_bulk_get(i2c_dev->dev, i2c_dev->nclocks, + i2c_dev->clocks); + if (err) + return err; + + err = clk_bulk_prepare(i2c_dev->nclocks, i2c_dev->clocks); + if (err) + return err; + + i2c_dev->div_clk = i2c_dev->clocks[0].clk; + + if (!i2c_dev->multimaster_mode) + return 0; + + err = clk_enable(i2c_dev->div_clk); + if (err) { + dev_err(i2c_dev->dev, "failed to enable div-clk: %d\n", err); + goto unprepare_clocks; + } + + return 0; + +unprepare_clocks: + clk_bulk_unprepare(i2c_dev->nclocks, i2c_dev->clocks); + + return err; +} + +static void tegra_i2c_release_clocks(struct tegra_i2c_dev *i2c_dev) +{ + if (i2c_dev->multimaster_mode) + clk_disable(i2c_dev->div_clk); + + clk_bulk_unprepare(i2c_dev->nclocks, i2c_dev->clocks); +} + +static int tegra_i2c_init_hardware(struct tegra_i2c_dev *i2c_dev) { - struct device *dev = &pdev->dev; - struct tegra_i2c_dev *i2c_dev; - struct resource *res; - struct clk *div_clk; - struct clk *fast_clk; - void __iomem *base; - phys_addr_t base_phys; - int irq; int ret; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base_phys = res->start; - base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(base)) - return PTR_ERR(base); + ret = pm_runtime_get_sync(i2c_dev->dev); + if (ret < 0) + dev_err(i2c_dev->dev, "runtime resume failed: %d\n", ret); + else + ret = tegra_i2c_init(i2c_dev); - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!res) { - dev_err(&pdev->dev, "no irq resource\n"); - return -EINVAL; - } - irq = res->start; + pm_runtime_put(i2c_dev->dev); - div_clk = devm_clk_get(&pdev->dev, "div-clk"); - if (IS_ERR(div_clk)) { - if (PTR_ERR(div_clk) != -EPROBE_DEFER) - dev_err(&pdev->dev, "missing controller clock\n"); + return ret; +} - return PTR_ERR(div_clk); - } +static int tegra_i2c_probe(struct platform_device *pdev) +{ + struct tegra_i2c_dev *i2c_dev; + struct resource *res; + int err; i2c_dev = devm_kzalloc(&pdev->dev, sizeof(*i2c_dev), GFP_KERNEL); if (!i2c_dev) return -ENOMEM; - i2c_dev->base = base; - i2c_dev->base_phys = base_phys; - i2c_dev->div_clk = div_clk; - i2c_dev->adapter.algo = &tegra_i2c_algo; - i2c_dev->adapter.retries = 1; - i2c_dev->adapter.timeout = 6 * HZ; - i2c_dev->irq = irq; + platform_set_drvdata(pdev, i2c_dev); + + init_completion(&i2c_dev->msg_complete); + init_completion(&i2c_dev->dma_complete); + + i2c_dev->hw = of_device_get_match_data(&pdev->dev); i2c_dev->cont_id = pdev->id; i2c_dev->dev = &pdev->dev; - i2c_dev->rst = devm_reset_control_get_exclusive(&pdev->dev, "i2c"); + i2c_dev->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(i2c_dev->base)) + return PTR_ERR(i2c_dev->base); + + i2c_dev->base_phys = res->start; + + err = platform_get_irq(pdev, 0); + if (err < 0) + return err; + + i2c_dev->irq = err; + + /* interrupt will be enabled during of transfer time */ + irq_set_status_flags(i2c_dev->irq, IRQ_NOAUTOEN); + + err = devm_request_irq(i2c_dev->dev, i2c_dev->irq, tegra_i2c_isr, + IRQF_NO_SUSPEND, dev_name(i2c_dev->dev), + i2c_dev); + if (err) + return err; + + i2c_dev->rst = devm_reset_control_get_exclusive(i2c_dev->dev, "i2c"); if (IS_ERR(i2c_dev->rst)) { - dev_err(&pdev->dev, "missing controller reset\n"); + dev_err_probe(i2c_dev->dev, PTR_ERR(i2c_dev->rst), + "failed to get reset control\n"); return PTR_ERR(i2c_dev->rst); } tegra_i2c_parse_dt(i2c_dev); - i2c_dev->hw = of_device_get_match_data(&pdev->dev); - i2c_dev->is_dvc = of_device_is_compatible(pdev->dev.of_node, - "nvidia,tegra20-i2c-dvc"); - i2c_dev->is_vi = of_device_is_compatible(dev->of_node, - "nvidia,tegra210-i2c-vi"); - i2c_dev->adapter.quirks = i2c_dev->hw->quirks; - i2c_dev->dma_buf_size = i2c_dev->adapter.quirks->max_write_len + - I2C_PACKET_HEADER_SIZE; - init_completion(&i2c_dev->msg_complete); - init_completion(&i2c_dev->dma_complete); + err = tegra_i2c_init_clocks(i2c_dev); + if (err) + return err; - if (!i2c_dev->hw->has_single_clk_source) { - fast_clk = devm_clk_get(&pdev->dev, "fast-clk"); - if (IS_ERR(fast_clk)) { - dev_err(&pdev->dev, "missing fast clock\n"); - return PTR_ERR(fast_clk); - } - i2c_dev->fast_clk = fast_clk; - } - - if (i2c_dev->is_vi) { - i2c_dev->slow_clk = devm_clk_get(dev, "slow"); - if (IS_ERR(i2c_dev->slow_clk)) { - if (PTR_ERR(i2c_dev->slow_clk) != -EPROBE_DEFER) - dev_err(dev, "failed to get slow clock: %ld\n", - PTR_ERR(i2c_dev->slow_clk)); - - return PTR_ERR(i2c_dev->slow_clk); - } - } - - platform_set_drvdata(pdev, i2c_dev); - - ret = clk_prepare(i2c_dev->fast_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, "Clock prepare failed %d\n", ret); - return ret; - } - - ret = clk_prepare(i2c_dev->slow_clk); - if (ret < 0) { - dev_err(dev, "failed to prepare slow clock: %d\n", ret); - goto unprepare_fast_clk; - } - - if (i2c_dev->bus_clk_rate > I2C_MAX_FAST_MODE_FREQ && - i2c_dev->bus_clk_rate <= I2C_MAX_FAST_MODE_PLUS_FREQ) - i2c_dev->clk_divisor_non_hs_mode = - i2c_dev->hw->clk_divisor_fast_plus_mode; - else if (i2c_dev->bus_clk_rate > I2C_MAX_STANDARD_MODE_FREQ && - i2c_dev->bus_clk_rate <= I2C_MAX_FAST_MODE_FREQ) - i2c_dev->clk_divisor_non_hs_mode = - i2c_dev->hw->clk_divisor_fast_mode; - else - i2c_dev->clk_divisor_non_hs_mode = - i2c_dev->hw->clk_divisor_std_mode; - - ret = clk_prepare(i2c_dev->div_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, "Clock prepare failed %d\n", ret); - goto unprepare_slow_clk; - } + err = tegra_i2c_init_dma(i2c_dev); + if (err) + goto release_clocks; /* - * VI I2C is in VE power domain which is not always on and not - * an IRQ safe. So, IRQ safe device can't be attached to a non-IRQ - * safe domain as it prevents powering off the PM domain. - * Also, VI I2C device don't need to use runtime IRQ safe as it will - * not be used for atomic transfers. + * VI I2C is in VE power domain which is not always ON and not + * IRQ-safe. Thus, IRQ-safe device shouldn't be attached to a + * non IRQ-safe domain because this prevents powering off the power + * domain. + * + * VI I2C device shouldn't be marked as IRQ-safe because VI I2C won't + * be used for atomic transfers. */ if (!i2c_dev->is_vi) - pm_runtime_irq_safe(&pdev->dev); - pm_runtime_enable(&pdev->dev); - if (!pm_runtime_enabled(&pdev->dev)) { - ret = tegra_i2c_runtime_resume(&pdev->dev); - if (ret < 0) { - dev_err(&pdev->dev, "runtime resume failed\n"); - goto unprepare_div_clk; - } - } else { - ret = pm_runtime_get_sync(i2c_dev->dev); - if (ret < 0) { - dev_err(&pdev->dev, "runtime resume failed\n"); - goto disable_rpm; - } - } + pm_runtime_irq_safe(i2c_dev->dev); - if (i2c_dev->is_multimaster_mode) { - ret = clk_enable(i2c_dev->div_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, "div_clk enable failed %d\n", - ret); - goto put_rpm; - } - } + pm_runtime_enable(i2c_dev->dev); + + err = tegra_i2c_init_hardware(i2c_dev); + if (err) + goto release_rpm; + + i2c_set_adapdata(&i2c_dev->adapter, i2c_dev); + i2c_dev->adapter.dev.of_node = i2c_dev->dev->of_node; + i2c_dev->adapter.dev.parent = i2c_dev->dev; + i2c_dev->adapter.retries = 1; + i2c_dev->adapter.timeout = 6 * HZ; + i2c_dev->adapter.quirks = i2c_dev->hw->quirks; + i2c_dev->adapter.owner = THIS_MODULE; + i2c_dev->adapter.class = I2C_CLASS_DEPRECATED; + i2c_dev->adapter.algo = &tegra_i2c_algo; + i2c_dev->adapter.nr = pdev->id; if (i2c_dev->hw->supports_bus_clear) i2c_dev->adapter.bus_recovery_info = &tegra_i2c_recovery_info; - ret = tegra_i2c_init_dma(i2c_dev); - if (ret < 0) - goto disable_div_clk; - - ret = tegra_i2c_init(i2c_dev, false); - if (ret) { - dev_err(&pdev->dev, "Failed to initialize i2c controller\n"); - goto release_dma; - } - - irq_set_status_flags(i2c_dev->irq, IRQ_NOAUTOEN); - - ret = devm_request_irq(&pdev->dev, i2c_dev->irq, tegra_i2c_isr, - IRQF_NO_SUSPEND, dev_name(&pdev->dev), i2c_dev); - if (ret) { - dev_err(&pdev->dev, "Failed to request irq %i\n", i2c_dev->irq); - goto release_dma; - } - - i2c_set_adapdata(&i2c_dev->adapter, i2c_dev); - i2c_dev->adapter.owner = THIS_MODULE; - i2c_dev->adapter.class = I2C_CLASS_DEPRECATED; - strlcpy(i2c_dev->adapter.name, dev_name(&pdev->dev), + strlcpy(i2c_dev->adapter.name, dev_name(i2c_dev->dev), sizeof(i2c_dev->adapter.name)); - i2c_dev->adapter.dev.parent = &pdev->dev; - i2c_dev->adapter.nr = pdev->id; - i2c_dev->adapter.dev.of_node = pdev->dev.of_node; - ret = i2c_add_numbered_adapter(&i2c_dev->adapter); - if (ret) - goto release_dma; - - pm_runtime_put(&pdev->dev); + err = i2c_add_numbered_adapter(&i2c_dev->adapter); + if (err) + goto release_rpm; return 0; -release_dma: +release_rpm: + pm_runtime_disable(i2c_dev->dev); + tegra_i2c_release_dma(i2c_dev); +release_clocks: + tegra_i2c_release_clocks(i2c_dev); -disable_div_clk: - if (i2c_dev->is_multimaster_mode) - clk_disable(i2c_dev->div_clk); - -put_rpm: - if (pm_runtime_enabled(&pdev->dev)) - pm_runtime_put_sync(&pdev->dev); - else - tegra_i2c_runtime_suspend(&pdev->dev); - -disable_rpm: - if (pm_runtime_enabled(&pdev->dev)) - pm_runtime_disable(&pdev->dev); - -unprepare_div_clk: - clk_unprepare(i2c_dev->div_clk); - -unprepare_slow_clk: - clk_unprepare(i2c_dev->slow_clk); - -unprepare_fast_clk: - clk_unprepare(i2c_dev->fast_clk); - - return ret; + return err; } static int tegra_i2c_remove(struct platform_device *pdev) @@ -1865,33 +1798,69 @@ static int tegra_i2c_remove(struct platform_device *pdev) struct tegra_i2c_dev *i2c_dev = platform_get_drvdata(pdev); i2c_del_adapter(&i2c_dev->adapter); - - if (i2c_dev->is_multimaster_mode) - clk_disable(i2c_dev->div_clk); - - pm_runtime_disable(&pdev->dev); - if (!pm_runtime_status_suspended(&pdev->dev)) - tegra_i2c_runtime_suspend(&pdev->dev); - - clk_unprepare(i2c_dev->div_clk); - clk_unprepare(i2c_dev->slow_clk); - clk_unprepare(i2c_dev->fast_clk); + pm_runtime_disable(i2c_dev->dev); tegra_i2c_release_dma(i2c_dev); + tegra_i2c_release_clocks(i2c_dev); + return 0; } +static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) +{ + struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); + int err; + + err = pinctrl_pm_select_default_state(dev); + if (err) + return err; + + err = clk_bulk_enable(i2c_dev->nclocks, i2c_dev->clocks); + if (err) + return err; + + /* + * VI I2C device is attached to VE power domain which goes through + * power ON/OFF during runtime PM resume/suspend, meaning that + * controller needs to be re-initialized after power ON. + */ + if (i2c_dev->is_vi) { + err = tegra_i2c_init(i2c_dev); + if (err) + goto disable_clocks; + } + + return 0; + +disable_clocks: + clk_bulk_disable(i2c_dev->nclocks, i2c_dev->clocks); + + return err; +} + +static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) +{ + struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); + + clk_bulk_disable(i2c_dev->nclocks, i2c_dev->clocks); + + return pinctrl_pm_select_idle_state(dev); +} + static int __maybe_unused tegra_i2c_suspend(struct device *dev) { struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); - int err = 0; + int err; i2c_mark_adapter_suspended(&i2c_dev->adapter); - if (!pm_runtime_status_suspended(dev)) + if (!pm_runtime_status_suspended(dev)) { err = tegra_i2c_runtime_suspend(dev); + if (err) + return err; + } - return err; + return 0; } static int __maybe_unused tegra_i2c_resume(struct device *dev) @@ -1907,7 +1876,7 @@ static int __maybe_unused tegra_i2c_resume(struct device *dev) if (err) return err; - err = tegra_i2c_init(i2c_dev, false); + err = tegra_i2c_init(i2c_dev); if (err) return err; @@ -1934,17 +1903,16 @@ static const struct dev_pm_ops tegra_i2c_pm = { }; static struct platform_driver tegra_i2c_driver = { - .probe = tegra_i2c_probe, - .remove = tegra_i2c_remove, - .driver = { - .name = "tegra-i2c", + .probe = tegra_i2c_probe, + .remove = tegra_i2c_remove, + .driver = { + .name = "tegra-i2c", .of_match_table = tegra_i2c_of_match, - .pm = &tegra_i2c_pm, + .pm = &tegra_i2c_pm, }, }; - module_platform_driver(tegra_i2c_driver); -MODULE_DESCRIPTION("nVidia Tegra2 I2C Bus Controller driver"); +MODULE_DESCRIPTION("NVIDIA Tegra I2C Bus Controller driver"); MODULE_AUTHOR("Colin Cross"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 90c1c362394d..087b2951942e 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -46,34 +46,36 @@ enum xiic_endian { /** * struct xiic_i2c - Internal representation of the XIIC I2C bus - * @dev: Pointer to device structure - * @base: Memory base of the HW registers - * @wait: Wait queue for callers - * @adap: Kernel adapter representation - * @tx_msg: Messages from above to be sent - * @lock: Mutual exclusion - * @tx_pos: Current pos in TX message - * @nmsgs: Number of messages in tx_msg - * @state: See STATE_ - * @rx_msg: Current RX message - * @rx_pos: Position within current RX message + * @dev: Pointer to device structure + * @base: Memory base of the HW registers + * @wait: Wait queue for callers + * @adap: Kernel adapter representation + * @tx_msg: Messages from above to be sent + * @lock: Mutual exclusion + * @tx_pos: Current pos in TX message + * @nmsgs: Number of messages in tx_msg + * @rx_msg: Current RX message + * @rx_pos: Position within current RX message * @endianness: big/little-endian byte order - * @clk: Pointer to AXI4-lite input clock + * @clk: Pointer to AXI4-lite input clock + * @state: See STATE_ + * @singlemaster: Indicates bus is single master */ struct xiic_i2c { - struct device *dev; - void __iomem *base; - wait_queue_head_t wait; - struct i2c_adapter adap; - struct i2c_msg *tx_msg; - struct mutex lock; - unsigned int tx_pos; - unsigned int nmsgs; - enum xilinx_i2c_state state; - struct i2c_msg *rx_msg; - int rx_pos; - enum xiic_endian endianness; + struct device *dev; + void __iomem *base; + wait_queue_head_t wait; + struct i2c_adapter adap; + struct i2c_msg *tx_msg; + struct mutex lock; + unsigned int tx_pos; + unsigned int nmsgs; + struct i2c_msg *rx_msg; + int rx_pos; + enum xiic_endian endianness; struct clk *clk; + enum xilinx_i2c_state state; + bool singlemaster; }; @@ -526,6 +528,15 @@ static int xiic_busy(struct xiic_i2c *i2c) if (i2c->tx_msg) return -EBUSY; + /* In single master mode bus can only be busy, when in use by this + * driver. If the register indicates bus being busy for some reason we + * should ignore it, since bus will never be released and i2c will be + * stuck forever. + */ + if (i2c->singlemaster) { + return 0; + } + /* for instance if previous transfer was terminated due to TX error * it might be that the bus is on it's way to become available * give it at most 3 ms to wake @@ -811,6 +822,9 @@ static int xiic_i2c_probe(struct platform_device *pdev) goto err_clk_dis; } + i2c->singlemaster = + of_property_read_bool(pdev->dev.of_node, "single-master"); + /* * Detect endianness * Try to reset the TX FIFO. Then check the EMPTY flag. If it is not diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c new file mode 100644 index 000000000000..c288102de324 --- /dev/null +++ b/drivers/i2c/i2c-slave-testunit.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * I2C slave mode testunit + * + * Copyright (C) 2020 by Wolfram Sang, Sang Engineering + * Copyright (C) 2020 by Renesas Electronics Corporation + */ + +#include +#include +#include +#include +#include +#include +#include /* FIXME: is system_long_wq the best choice? */ + +#define TU_CUR_VERSION 0x01 + +enum testunit_cmds { + TU_CMD_READ_BYTES = 1, /* save 0 for ABORT, RESET or similar */ + TU_CMD_HOST_NOTIFY, + TU_NUM_CMDS +}; + +enum testunit_regs { + TU_REG_CMD, + TU_REG_DATAL, + TU_REG_DATAH, + TU_REG_DELAY, + TU_NUM_REGS +}; + +enum testunit_flags { + TU_FLAG_IN_PROCESS, +}; + +struct testunit_data { + unsigned long flags; + u8 regs[TU_NUM_REGS]; + u8 reg_idx; + struct i2c_client *client; + struct delayed_work worker; +}; + +static void i2c_slave_testunit_work(struct work_struct *work) +{ + struct testunit_data *tu = container_of(work, struct testunit_data, worker.work); + struct i2c_msg msg; + u8 msgbuf[256]; + int ret = 0; + + msg.addr = I2C_CLIENT_END; + msg.buf = msgbuf; + + switch (tu->regs[TU_REG_CMD]) { + case TU_CMD_READ_BYTES: + msg.addr = tu->regs[TU_REG_DATAL]; + msg.flags = I2C_M_RD; + msg.len = tu->regs[TU_REG_DATAH]; + break; + + case TU_CMD_HOST_NOTIFY: + msg.addr = 0x08; + msg.flags = 0; + msg.len = 3; + msgbuf[0] = tu->client->addr; + msgbuf[1] = tu->regs[TU_REG_DATAL]; + msgbuf[2] = tu->regs[TU_REG_DATAH]; + break; + + default: + break; + } + + if (msg.addr != I2C_CLIENT_END) { + ret = i2c_transfer(tu->client->adapter, &msg, 1); + /* convert '0 msgs transferred' to errno */ + ret = (ret == 0) ? -EIO : ret; + } + + if (ret < 0) + dev_err(&tu->client->dev, "CMD%02X failed (%d)\n", tu->regs[TU_REG_CMD], ret); + + clear_bit(TU_FLAG_IN_PROCESS, &tu->flags); +} + +static int i2c_slave_testunit_slave_cb(struct i2c_client *client, + enum i2c_slave_event event, u8 *val) +{ + struct testunit_data *tu = i2c_get_clientdata(client); + int ret = 0; + + switch (event) { + case I2C_SLAVE_WRITE_RECEIVED: + if (test_bit(TU_FLAG_IN_PROCESS, &tu->flags)) + return -EBUSY; + + if (tu->reg_idx < TU_NUM_REGS) + tu->regs[tu->reg_idx] = *val; + else + ret = -EMSGSIZE; + + if (tu->reg_idx <= TU_NUM_REGS) + tu->reg_idx++; + + /* TU_REG_CMD always written at this point */ + if (tu->regs[TU_REG_CMD] >= TU_NUM_CMDS) + ret = -EINVAL; + + break; + + case I2C_SLAVE_STOP: + if (tu->reg_idx == TU_NUM_REGS) { + set_bit(TU_FLAG_IN_PROCESS, &tu->flags); + queue_delayed_work(system_long_wq, &tu->worker, + msecs_to_jiffies(10 * tu->regs[TU_REG_DELAY])); + } + fallthrough; + + case I2C_SLAVE_WRITE_REQUESTED: + tu->reg_idx = 0; + break; + + case I2C_SLAVE_READ_REQUESTED: + case I2C_SLAVE_READ_PROCESSED: + *val = TU_CUR_VERSION; + break; + } + + return ret; +} + +static int i2c_slave_testunit_probe(struct i2c_client *client) +{ + struct testunit_data *tu; + + tu = devm_kzalloc(&client->dev, sizeof(struct testunit_data), GFP_KERNEL); + if (!tu) + return -ENOMEM; + + tu->client = client; + i2c_set_clientdata(client, tu); + INIT_DELAYED_WORK(&tu->worker, i2c_slave_testunit_work); + + return i2c_slave_register(client, i2c_slave_testunit_slave_cb); +}; + +static int i2c_slave_testunit_remove(struct i2c_client *client) +{ + struct testunit_data *tu = i2c_get_clientdata(client); + + cancel_delayed_work_sync(&tu->worker); + i2c_slave_unregister(client); + return 0; +} + +static const struct i2c_device_id i2c_slave_testunit_id[] = { + { "slave-testunit", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, i2c_slave_testunit_id); + +static struct i2c_driver i2c_slave_testunit_driver = { + .driver = { + .name = "i2c-slave-testunit", + }, + .probe_new = i2c_slave_testunit_probe, + .remove = i2c_slave_testunit_remove, + .id_table = i2c_slave_testunit_id, +}; +module_i2c_driver(i2c_slave_testunit_driver); + +MODULE_AUTHOR("Wolfram Sang "); +MODULE_DESCRIPTION("I2C slave mode test unit"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c index dc0108287ccf..d3d06e3b4f3b 100644 --- a/drivers/i2c/i2c-smbus.c +++ b/drivers/i2c/i2c-smbus.c @@ -197,6 +197,113 @@ EXPORT_SYMBOL_GPL(i2c_handle_smbus_alert); module_i2c_driver(smbalert_driver); +#if IS_ENABLED(CONFIG_I2C_SLAVE) +#define SMBUS_HOST_NOTIFY_LEN 3 +struct i2c_slave_host_notify_status { + u8 index; + u8 addr; +}; + +static int i2c_slave_host_notify_cb(struct i2c_client *client, + enum i2c_slave_event event, u8 *val) +{ + struct i2c_slave_host_notify_status *status = client->dev.platform_data; + + switch (event) { + case I2C_SLAVE_WRITE_RECEIVED: + /* We only retrieve the first byte received (addr) + * since there is currently no support to retrieve the data + * parameter from the client. + */ + if (status->index == 0) + status->addr = *val; + if (status->index < U8_MAX) + status->index++; + break; + case I2C_SLAVE_STOP: + if (status->index == SMBUS_HOST_NOTIFY_LEN) + i2c_handle_smbus_host_notify(client->adapter, + status->addr); + fallthrough; + case I2C_SLAVE_WRITE_REQUESTED: + status->index = 0; + break; + case I2C_SLAVE_READ_REQUESTED: + case I2C_SLAVE_READ_PROCESSED: + *val = 0xff; + break; + } + + return 0; +} + +/** + * i2c_new_slave_host_notify_device - get a client for SMBus host-notify support + * @adapter: the target adapter + * Context: can sleep + * + * Setup handling of the SMBus host-notify protocol on a given I2C bus segment. + * + * Handling is done by creating a device and its callback and handling data + * received via the SMBus host-notify address (0x8) + * + * This returns the client, which should be ultimately freed using + * i2c_free_slave_host_notify_device(); or an ERRPTR to indicate an error. + */ +struct i2c_client *i2c_new_slave_host_notify_device(struct i2c_adapter *adapter) +{ + struct i2c_board_info host_notify_board_info = { + I2C_BOARD_INFO("smbus_host_notify", 0x08), + .flags = I2C_CLIENT_SLAVE, + }; + struct i2c_slave_host_notify_status *status; + struct i2c_client *client; + int ret; + + status = kzalloc(sizeof(struct i2c_slave_host_notify_status), + GFP_KERNEL); + if (!status) + return ERR_PTR(-ENOMEM); + + host_notify_board_info.platform_data = status; + + client = i2c_new_client_device(adapter, &host_notify_board_info); + if (IS_ERR(client)) { + kfree(status); + return client; + } + + ret = i2c_slave_register(client, i2c_slave_host_notify_cb); + if (ret) { + i2c_unregister_device(client); + kfree(status); + return ERR_PTR(ret); + } + + return client; +} +EXPORT_SYMBOL_GPL(i2c_new_slave_host_notify_device); + +/** + * i2c_free_slave_host_notify_device - free the client for SMBus host-notify + * support + * @client: the client to free + * Context: can sleep + * + * Free the i2c_client allocated via i2c_new_slave_host_notify_device + */ +void i2c_free_slave_host_notify_device(struct i2c_client *client) +{ + if (IS_ERR_OR_NULL(client)) + return; + + i2c_slave_unregister(client); + kfree(client->dev.platform_data); + i2c_unregister_device(client); +} +EXPORT_SYMBOL_GPL(i2c_free_slave_host_notify_device); +#endif + /* * SPD is not part of SMBus but we include it here for convenience as the * target systems are the same. diff --git a/drivers/i2c/muxes/i2c-mux-gpmux.c b/drivers/i2c/muxes/i2c-mux-gpmux.c index f830535cff12..d3acd8d66c32 100644 --- a/drivers/i2c/muxes/i2c-mux-gpmux.c +++ b/drivers/i2c/muxes/i2c-mux-gpmux.c @@ -85,18 +85,14 @@ static int i2c_mux_probe(struct platform_device *pdev) return -ENOMEM; mux->control = devm_mux_control_get(dev, NULL); - if (IS_ERR(mux->control)) { - if (PTR_ERR(mux->control) != -EPROBE_DEFER) - dev_err(dev, "failed to get control-mux\n"); - return PTR_ERR(mux->control); - } + if (IS_ERR(mux->control)) + return dev_err_probe(dev, PTR_ERR(mux->control), + "failed to get control-mux\n"); parent = mux_parent_adapter(dev); - if (IS_ERR(parent)) { - if (PTR_ERR(parent) != -EPROBE_DEFER) - dev_err(dev, "failed to get i2c-parent adapter\n"); - return PTR_ERR(parent); - } + if (IS_ERR(parent)) + return dev_err_probe(dev, PTR_ERR(parent), + "failed to get i2c-parent adapter\n"); children = of_get_child_count(np); diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c index b59a62f8d7a6..0e0679f65cf7 100644 --- a/drivers/i2c/muxes/i2c-mux-reg.c +++ b/drivers/i2c/muxes/i2c-mux-reg.c @@ -171,13 +171,9 @@ static int i2c_mux_reg_probe(struct platform_device *pdev) sizeof(mux->data)); } else { ret = i2c_mux_reg_probe_dt(mux, pdev); - if (ret == -EPROBE_DEFER) - return ret; - - if (ret < 0) { - dev_err(&pdev->dev, "Error parsing device tree"); - return ret; - } + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Error parsing device tree"); } parent = i2c_get_adapter(mux->data.parent); diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 26a23abc053d..1c0a41803bb6 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -89,6 +90,7 @@ struct at24_data { struct nvmem_device *nvmem; struct regulator *vcc_reg; + void (*read_post)(unsigned int off, char *buf, size_t count); /* * Some chips tie up multiple I2C addresses; dummy devices reserve @@ -121,6 +123,7 @@ MODULE_PARM_DESC(at24_write_timeout, "Time (in ms) to try writes (default 25)"); struct at24_chip_data { u32 byte_len; u8 flags; + void (*read_post)(unsigned int off, char *buf, size_t count); }; #define AT24_CHIP_DATA(_name, _len, _flags) \ @@ -128,6 +131,32 @@ struct at24_chip_data { .byte_len = _len, .flags = _flags, \ } +#define AT24_CHIP_DATA_CB(_name, _len, _flags, _read_post) \ + static const struct at24_chip_data _name = { \ + .byte_len = _len, .flags = _flags, \ + .read_post = _read_post, \ + } + +static void at24_read_post_vaio(unsigned int off, char *buf, size_t count) +{ + int i; + + if (capable(CAP_SYS_ADMIN)) + return; + + /* + * Hide VAIO private settings to regular users: + * - BIOS passwords: bytes 0x00 to 0x0f + * - UUID: bytes 0x10 to 0x1f + * - Serial number: 0xc0 to 0xdf + */ + for (i = 0; i < count; i++) { + if ((off + i <= 0x1f) || + (off + i >= 0xc0 && off + i <= 0xdf)) + buf[i] = 0; + } +} + /* needs 8 addresses as A0-A2 are ignored */ AT24_CHIP_DATA(at24_data_24c00, 128 / 8, AT24_FLAG_TAKE8ADDR); /* old variants can't be handled with this generic entry! */ @@ -144,6 +173,10 @@ AT24_CHIP_DATA(at24_data_24mac602, 64 / 8, /* spd is a 24c02 in memory DIMMs */ AT24_CHIP_DATA(at24_data_spd, 2048 / 8, AT24_FLAG_READONLY | AT24_FLAG_IRUGO); +/* 24c02_vaio is a 24c02 on some Sony laptops */ +AT24_CHIP_DATA_CB(at24_data_24c02_vaio, 2048 / 8, + AT24_FLAG_READONLY | AT24_FLAG_IRUGO, + at24_read_post_vaio); AT24_CHIP_DATA(at24_data_24c04, 4096 / 8, 0); AT24_CHIP_DATA(at24_data_24cs04, 16, AT24_FLAG_SERIAL | AT24_FLAG_READONLY); @@ -177,6 +210,7 @@ static const struct i2c_device_id at24_ids[] = { { "24mac402", (kernel_ulong_t)&at24_data_24mac402 }, { "24mac602", (kernel_ulong_t)&at24_data_24mac602 }, { "spd", (kernel_ulong_t)&at24_data_spd }, + { "24c02-vaio", (kernel_ulong_t)&at24_data_24c02_vaio }, { "24c04", (kernel_ulong_t)&at24_data_24c04 }, { "24cs04", (kernel_ulong_t)&at24_data_24cs04 }, { "24c08", (kernel_ulong_t)&at24_data_24c08 }, @@ -388,7 +422,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) struct at24_data *at24; struct device *dev; char *buf = val; - int ret; + int i, ret; at24 = priv; dev = at24_base_client_dev(at24); @@ -411,22 +445,22 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) */ mutex_lock(&at24->lock); - while (count) { - ret = at24_regmap_read(at24, buf, off, count); + for (i = 0; count; i += ret, count -= ret) { + ret = at24_regmap_read(at24, buf + i, off + i, count); if (ret < 0) { mutex_unlock(&at24->lock); pm_runtime_put(dev); return ret; } - buf += ret; - off += ret; - count -= ret; } mutex_unlock(&at24->lock); pm_runtime_put(dev); + if (unlikely(at24->read_post)) + at24->read_post(off, buf, i); + return 0; } @@ -654,6 +688,7 @@ static int at24_probe(struct i2c_client *client) at24->byte_len = byte_len; at24->page_size = page_size; at24->flags = flags; + at24->read_post = cdata->read_post; at24->num_addresses = num_addresses; at24->offset_adj = at24_get_offset_adj(flags, byte_len); at24->client[0].client = client; @@ -678,8 +713,30 @@ static int at24_probe(struct i2c_client *client) return err; } - nvmem_config.name = dev_name(dev); + /* + * If the 'label' property is not present for the AT24 EEPROM, + * then nvmem_config.id is initialised to NVMEM_DEVID_AUTO, + * and this will append the 'devid' to the name of the NVMEM + * device. This is purely legacy and the AT24 driver has always + * defaulted to this. However, if the 'label' property is + * present then this means that the name is specified by the + * firmware and this name should be used verbatim and so it is + * not necessary to append the 'devid'. + */ + if (device_property_present(dev, "label")) { + nvmem_config.id = NVMEM_DEVID_NONE; + err = device_property_read_string(dev, "label", + &nvmem_config.name); + if (err) + return err; + } else { + nvmem_config.id = NVMEM_DEVID_AUTO; + nvmem_config.name = dev_name(dev); + } + + nvmem_config.type = NVMEM_TYPE_EEPROM; nvmem_config.dev = dev; + nvmem_config.id = NVMEM_DEVID_AUTO; nvmem_config.read_only = !writable; nvmem_config.root_only = !(flags & AT24_FLAG_IRUGO); nvmem_config.owner = THIS_MODULE; diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c index 226b5efa6a77..34fa385dfd4b 100644 --- a/drivers/misc/eeprom/eeprom.c +++ b/drivers/misc/eeprom/eeprom.c @@ -76,7 +76,7 @@ static ssize_t eeprom_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { - struct i2c_client *client = to_i2c_client(kobj_to_dev(kobj)); + struct i2c_client *client = kobj_to_i2c_client(kobj); struct eeprom_data *data = i2c_get_clientdata(client); u8 slice; diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 48c536acd777..65ad9d0b47ab 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -669,6 +669,16 @@ config RTC_DRV_RV3028 This driver can also be built as a module. If so, the module will be called rtc-rv3028. +config RTC_DRV_RV3032 + tristate "Micro Crystal RV3032" + select REGMAP_I2C + help + If you say yes here you get support for the Micro Crystal + RV3032. + + This driver can also be built as a module. If so, the module + will be called rtc-rv3032. + config RTC_DRV_RV8803 tristate "Micro Crystal RV8803, Epson RX8900" help diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 880e08a409c3..bfb57464118d 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -141,6 +141,7 @@ obj-$(CONFIG_RTC_DRV_RS5C372) += rtc-rs5c372.o obj-$(CONFIG_RTC_DRV_RTD119X) += rtc-rtd119x.o obj-$(CONFIG_RTC_DRV_RV3028) += rtc-rv3028.o obj-$(CONFIG_RTC_DRV_RV3029C2) += rtc-rv3029c2.o +obj-$(CONFIG_RTC_DRV_RV3032) += rtc-rv3032.o obj-$(CONFIG_RTC_DRV_RV8803) += rtc-rv8803.o obj-$(CONFIG_RTC_DRV_RX4581) += rtc-rx4581.o obj-$(CONFIG_RTC_DRV_RX6110) += rtc-rx6110.o diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index bcc96ab7793f..c633319cdb91 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -1006,6 +1006,7 @@ static int cmos_suspend(struct device *dev) enable_irq_wake(cmos->irq); } + memset(&cmos->saved_wkalrm, 0, sizeof(struct rtc_wkalrm)); cmos_read_alarm(dev, &cmos->saved_wkalrm); dev_dbg(dev, "suspend%s, ctrl %02x\n", @@ -1054,6 +1055,7 @@ static void cmos_check_wkalrm(struct device *dev) return; } + memset(¤t_alarm, 0, sizeof(struct rtc_wkalrm)); cmos_read_alarm(dev, ¤t_alarm); t_current_expires = rtc_tm_to_time64(¤t_alarm.time); t_saved_expires = rtc_tm_to_time64(&cmos->saved_wkalrm.time); diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 54c85cdd019d..9f5f54ca039d 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -122,6 +122,9 @@ enum ds_type { #define RX8130_REG_FLAG_AF BIT(3) #define RX8130_REG_CONTROL0 0x1e #define RX8130_REG_CONTROL0_AIE BIT(3) +#define RX8130_REG_CONTROL1 0x1f +#define RX8130_REG_CONTROL1_INIEN BIT(4) +#define RX8130_REG_CONTROL1_CHGEN BIT(5) #define MCP794XX_REG_CONTROL 0x07 # define MCP794XX_BIT_ALM0_EN 0x10 @@ -153,6 +156,7 @@ enum ds_type { #define DS1388_REG_CONTROL 0x0c # define DS1388_BIT_RST BIT(0) # define DS1388_BIT_WDE BIT(1) +# define DS1388_BIT_nEOSC BIT(7) /* negative offset step is -2.034ppm */ #define M41TXX_NEG_OFFSET_STEP_PPB 2034 @@ -190,6 +194,15 @@ struct chip_desc { u16 trickle_charger_reg; u8 (*do_trickle_setup)(struct ds1307 *, u32, bool); + /* Does the RTC require trickle-resistor-ohms to select the value of + * the resistor between Vcc and Vbackup? + */ + bool requires_trickle_resistor; + /* Some RTC's batteries and supercaps were charged by default, others + * allow charging but were not configured previously to do so. + * Remember this behavior to stay backwards compatible. + */ + bool charge_default; }; static const struct chip_desc chips[last_ds_type]; @@ -352,6 +365,10 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t) regmap_update_bits(ds1307->regmap, DS1340_REG_FLAG, DS1340_BIT_OSF, 0); break; + case ds_1388: + regmap_update_bits(ds1307->regmap, DS1388_REG_FLAG, + DS1388_BIT_OSF, 0); + break; case mcp794xx: /* * these bits were cleared when preparing the date/time @@ -507,6 +524,8 @@ static u8 do_trickle_setup_ds1339(struct ds1307 *ds1307, u32 ohms, bool diode) u8 setup = (diode) ? DS1307_TRICKLE_CHARGER_DIODE : DS1307_TRICKLE_CHARGER_NO_DIODE; + setup |= DS13XX_TRICKLE_CHARGER_MAGIC; + switch (ohms) { case 250: setup |= DS1307_TRICKLE_CHARGER_250_OHM; @@ -525,6 +544,16 @@ static u8 do_trickle_setup_ds1339(struct ds1307 *ds1307, u32 ohms, bool diode) return setup; } +static u8 do_trickle_setup_rx8130(struct ds1307 *ds1307, u32 ohms, bool diode) +{ + /* make sure that the backup battery is enabled */ + u8 setup = RX8130_REG_CONTROL1_INIEN; + if (diode) + setup |= RX8130_REG_CONTROL1_CHGEN; + + return setup; +} + static irqreturn_t rx8130_irq(int irq, void *dev_id) { struct ds1307 *ds1307 = dev_id; @@ -979,6 +1008,8 @@ static const struct chip_desc chips[last_ds_type] = { .bbsqi_bit = DS1339_BIT_BBSQI, .trickle_charger_reg = 0x10, .do_trickle_setup = &do_trickle_setup_ds1339, + .requires_trickle_resistor = true, + .charge_default = true, }, [ds_1340] = { .century_reg = DS1307_REG_HOUR, @@ -986,6 +1017,8 @@ static const struct chip_desc chips[last_ds_type] = { .century_bit = DS1340_BIT_CENTURY, .do_trickle_setup = &do_trickle_setup_ds1339, .trickle_charger_reg = 0x08, + .requires_trickle_resistor = true, + .charge_default = true, }, [ds_1341] = { .century_reg = DS1307_REG_MONTH, @@ -1009,6 +1042,8 @@ static const struct chip_desc chips[last_ds_type] = { .offset = 0x10, .irq_handler = rx8130_irq, .rtc_ops = &rx8130_rtc_ops, + .trickle_charger_reg = RX8130_REG_CONTROL1, + .do_trickle_setup = &do_trickle_setup_rx8130, }, [m41t0] = { .rtc_ops = &m41txx_rtc_ops, @@ -1293,18 +1328,37 @@ static int ds1307_nvram_write(void *priv, unsigned int offset, void *val, static u8 ds1307_trickle_init(struct ds1307 *ds1307, const struct chip_desc *chip) { - u32 ohms; - bool diode = true; + u32 ohms, chargeable; + bool diode = chip->charge_default; if (!chip->do_trickle_setup) return 0; if (device_property_read_u32(ds1307->dev, "trickle-resistor-ohms", - &ohms)) + &ohms) && chip->requires_trickle_resistor) return 0; - if (device_property_read_bool(ds1307->dev, "trickle-diode-disable")) + /* aux-voltage-chargeable takes precedence over the deprecated + * trickle-diode-disable + */ + if (!device_property_read_u32(ds1307->dev, "aux-voltage-chargeable", + &chargeable)) { + switch (chargeable) { + case 0: + diode = false; + break; + case 1: + diode = true; + break; + default: + dev_warn(ds1307->dev, + "unsupported aux-voltage-chargeable value\n"); + break; + } + } else if (device_property_read_bool(ds1307->dev, + "trickle-diode-disable")) { diode = false; + } return chip->do_trickle_setup(ds1307, ohms, diode); } @@ -1758,7 +1812,6 @@ static int ds1307_probe(struct i2c_client *client, trickle_charger_setup = pdata->trickle_charger_setup; if (trickle_charger_setup && chip->trickle_charger_reg) { - trickle_charger_setup |= DS13XX_TRICKLE_CHARGER_MAGIC; dev_dbg(ds1307->dev, "writing trickle charger info 0x%x to 0x%x\n", trickle_charger_setup, chip->trickle_charger_reg); @@ -1881,6 +1934,19 @@ static int ds1307_probe(struct i2c_client *client, DS1307_REG_HOUR << 4 | 0x08, hour); } break; + case ds_1388: + err = regmap_read(ds1307->regmap, DS1388_REG_CONTROL, &tmp); + if (err) { + dev_dbg(ds1307->dev, "read error %d\n", err); + goto exit; + } + + /* oscillator off? turn it on, so clock can tick. */ + if (tmp & DS1388_BIT_nEOSC) { + tmp &= ~DS1388_BIT_nEOSC; + regmap_write(ds1307->regmap, DS1388_REG_CONTROL, tmp); + } + break; default: break; } diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index 56c670af2e50..dfbd7b88b2b9 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -193,12 +193,12 @@ ds1685_rtc_begin_data_access(struct ds1685_priv *rtc) rtc->write(rtc, RTC_CTRL_B, (rtc->read(rtc, RTC_CTRL_B) | RTC_CTRL_B_SET)); + /* Switch to Bank 1 */ + ds1685_rtc_switch_to_bank1(rtc); + /* Read Ext Ctrl 4A and check the INCR bit to avoid a lockout. */ while (rtc->read(rtc, RTC_EXT_CTRL_4A) & RTC_CTRL_4A_INCR) cpu_relax(); - - /* Switch to Bank 1 */ - ds1685_rtc_switch_to_bank1(rtc); } /** @@ -213,7 +213,7 @@ static inline void ds1685_rtc_end_data_access(struct ds1685_priv *rtc) { /* Switch back to Bank 0 */ - ds1685_rtc_switch_to_bank1(rtc); + ds1685_rtc_switch_to_bank0(rtc); /* Clear the SET bit in Ctrl B */ rtc->write(rtc, RTC_CTRL_B, diff --git a/drivers/rtc/rtc-fsl-ftm-alarm.c b/drivers/rtc/rtc-fsl-ftm-alarm.c index 68f0a1801a2e..48d3b38ea348 100644 --- a/drivers/rtc/rtc-fsl-ftm-alarm.c +++ b/drivers/rtc/rtc-fsl-ftm-alarm.c @@ -3,7 +3,7 @@ * Freescale FlexTimer Module (FTM) alarm device driver. * * Copyright 2014 Freescale Semiconductor, Inc. - * Copyright 2019 NXP + * Copyright 2019-2020 NXP * */ @@ -312,7 +312,7 @@ static const struct of_device_id ftm_rtc_match[] = { }; static const struct acpi_device_id ftm_imx_acpi_ids[] = { - {"NXP0011",}, + {"NXP0014",}, { } }; MODULE_DEVICE_TABLE(acpi, ftm_imx_acpi_ids); diff --git a/drivers/rtc/rtc-meson-vrtc.c b/drivers/rtc/rtc-meson-vrtc.c index 89e5ba0dae69..e6bd0808a092 100644 --- a/drivers/rtc/rtc-meson-vrtc.c +++ b/drivers/rtc/rtc-meson-vrtc.c @@ -65,7 +65,6 @@ static const struct rtc_class_ops meson_vrtc_ops = { static int meson_vrtc_probe(struct platform_device *pdev) { struct meson_vrtc_data *vrtc; - int ret; vrtc = devm_kzalloc(&pdev->dev, sizeof(*vrtc), GFP_KERNEL); if (!vrtc) @@ -84,11 +83,7 @@ static int meson_vrtc_probe(struct platform_device *pdev) return PTR_ERR(vrtc->rtc); vrtc->rtc->ops = &meson_vrtc_ops; - ret = rtc_register_device(vrtc->rtc); - if (ret) - return ret; - - return 0; + return rtc_register_device(vrtc->rtc); } static int __maybe_unused meson_vrtc_suspend(struct device *dev) diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c index f8b1353777ba..1894aded4c85 100644 --- a/drivers/rtc/rtc-mt6397.c +++ b/drivers/rtc/rtc-mt6397.c @@ -31,7 +31,8 @@ static int mtk_rtc_write_trigger(struct mt6397_rtc *rtc) MTK_RTC_POLL_DELAY_US, MTK_RTC_POLL_TIMEOUT); if (ret < 0) - dev_err(rtc->dev, "failed to write WRTGE: %d\n", ret); + dev_err(rtc->rtc_dev->dev.parent, + "failed to write WRTGR: %d\n", ret); return ret; } diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index ed6316992cbb..07a5630ec841 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -559,7 +559,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, pcf2127->rtc->set_start_time = true; /* Sets actual start to 1970 */ pcf2127->rtc->uie_unsupported = 1; - if (alarm_irq >= 0) { + if (alarm_irq > 0) { ret = devm_request_threaded_irq(dev, alarm_irq, NULL, pcf2127_rtc_irq, IRQF_TRIGGER_LOW | IRQF_ONESHOT, @@ -570,7 +570,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, } } - if (alarm_irq >= 0 || device_property_read_bool(dev, "wakeup-source")) { + if (alarm_irq > 0 || device_property_read_bool(dev, "wakeup-source")) { device_init_wakeup(dev, true); pcf2127->rtc->ops = &pcf2127_rtc_alrm_ops; } diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c index 84f0d25259ae..7ceb968f0e44 100644 --- a/drivers/rtc/rtc-r9701.c +++ b/drivers/rtc/rtc-r9701.c @@ -75,8 +75,6 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt) if (ret) return ret; - memset(dt, 0, sizeof(*dt)); - dt->tm_sec = bcd2bin(buf[0]); /* RSECCNT */ dt->tm_min = bcd2bin(buf[1]); /* RMINCNT */ dt->tm_hour = bcd2bin(buf[2]); /* RHRCNT */ @@ -85,20 +83,12 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt) dt->tm_mon = bcd2bin(buf[4]) - 1; /* RMONCNT */ dt->tm_year = bcd2bin(buf[5]) + 100; /* RYRCNT */ - /* the rtc device may contain illegal values on power up - * according to the data sheet. make sure they are valid. - */ - return 0; } static int r9701_set_datetime(struct device *dev, struct rtc_time *dt) { - int ret, year; - - year = dt->tm_year + 1900; - if (year >= 2100 || year < 2000) - return -EINVAL; + int ret; ret = write_reg(dev, RHRCNT, bin2bcd(dt->tm_hour)); ret = ret ? ret : write_reg(dev, RMINCNT, bin2bcd(dt->tm_min)); @@ -106,7 +96,6 @@ static int r9701_set_datetime(struct device *dev, struct rtc_time *dt) ret = ret ? ret : write_reg(dev, RDAYCNT, bin2bcd(dt->tm_mday)); ret = ret ? ret : write_reg(dev, RMONCNT, bin2bcd(dt->tm_mon + 1)); ret = ret ? ret : write_reg(dev, RYRCNT, bin2bcd(dt->tm_year - 100)); - ret = ret ? ret : write_reg(dev, RWKCNT, 1 << dt->tm_wday); return ret; } @@ -119,7 +108,6 @@ static const struct rtc_class_ops r9701_rtc_ops = { static int r9701_probe(struct spi_device *spi) { struct rtc_device *rtc; - struct rtc_time dt; unsigned char tmp; int res; @@ -130,35 +118,16 @@ static int r9701_probe(struct spi_device *spi) return -ENODEV; } - /* - * The device seems to be present. Now check if the registers - * contain invalid values. If so, try to write a default date: - * 2000/1/1 00:00:00 - */ - if (r9701_get_datetime(&spi->dev, &dt)) { - dev_info(&spi->dev, "trying to repair invalid date/time\n"); - dt.tm_sec = 0; - dt.tm_min = 0; - dt.tm_hour = 0; - dt.tm_mday = 1; - dt.tm_mon = 0; - dt.tm_year = 100; - - if (r9701_set_datetime(&spi->dev, &dt) || - r9701_get_datetime(&spi->dev, &dt)) { - dev_err(&spi->dev, "cannot repair RTC register\n"); - return -ENODEV; - } - } - - rtc = devm_rtc_device_register(&spi->dev, "r9701", - &r9701_rtc_ops, THIS_MODULE); + rtc = devm_rtc_allocate_device(&spi->dev); if (IS_ERR(rtc)) return PTR_ERR(rtc); spi_set_drvdata(spi, rtc); + rtc->ops = &r9701_rtc_ops; + rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rtc->range_max = RTC_TIMESTAMP_END_2099; - return 0; + return rtc_register_device(rtc); } static struct spi_driver r9701_driver = { diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c index 89f38e3e917d..e98f85f34206 100644 --- a/drivers/rtc/rtc-rs5c313.c +++ b/drivers/rtc/rtc-rs5c313.c @@ -366,15 +366,15 @@ static const struct rtc_class_ops rs5c313_rtc_ops = { static int rs5c313_rtc_probe(struct platform_device *pdev) { - struct rtc_device *rtc = devm_rtc_device_register(&pdev->dev, "rs5c313", - &rs5c313_rtc_ops, THIS_MODULE); + struct rtc_device *rtc; - if (IS_ERR(rtc)) - return PTR_ERR(rtc); + rs5c313_init_port(); + rs5c313_check_xstp_bit(); - platform_set_drvdata(pdev, rtc); + rtc = devm_rtc_device_register(&pdev->dev, "rs5c313", &rs5c313_rtc_ops, + THIS_MODULE); - return 0; + return PTR_ERR_OR_ZERO(rtc); } static struct platform_driver rs5c313_rtc_platform_driver = { @@ -384,27 +384,7 @@ static struct platform_driver rs5c313_rtc_platform_driver = { .probe = rs5c313_rtc_probe, }; -static int __init rs5c313_rtc_init(void) -{ - int err; - - err = platform_driver_register(&rs5c313_rtc_platform_driver); - if (err) - return err; - - rs5c313_init_port(); - rs5c313_check_xstp_bit(); - - return 0; -} - -static void __exit rs5c313_rtc_exit(void) -{ - platform_driver_unregister(&rs5c313_rtc_platform_driver); -} - -module_init(rs5c313_rtc_init); -module_exit(rs5c313_rtc_exit); +module_platform_driver(rs5c313_rtc_platform_driver); MODULE_AUTHOR("kogiidena , Nobuhiro Iwamatsu "); MODULE_DESCRIPTION("Ricoh RS5C313 RTC device driver"); diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index ec84db0b3d7a..fa226f0fe67d 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -71,6 +71,7 @@ #define RV3028_EVT_CTRL_TSR BIT(2) +#define RV3028_EEPROM_CMD_UPDATE 0x11 #define RV3028_EEPROM_CMD_WRITE 0x21 #define RV3028_EEPROM_CMD_READ 0x22 @@ -95,7 +96,7 @@ struct rv3028_data { #endif }; -static u16 rv3028_trickle_resistors[] = {1000, 3000, 6000, 11000}; +static u16 rv3028_trickle_resistors[] = {3000, 5000, 9000, 15000}; static ssize_t timestamp0_store(struct device *dev, struct device_attribute *attr, @@ -171,6 +172,88 @@ static const struct attribute_group rv3028_attr_group = { .attrs = rv3028_attrs, }; +static int rv3028_exit_eerd(struct rv3028_data *rv3028, u32 eerd) +{ + if (eerd) + return 0; + + return regmap_update_bits(rv3028->regmap, RV3028_CTRL1, RV3028_CTRL1_EERD, 0); +} + +static int rv3028_enter_eerd(struct rv3028_data *rv3028, u32 *eerd) +{ + u32 ctrl1, status; + int ret; + + ret = regmap_read(rv3028->regmap, RV3028_CTRL1, &ctrl1); + if (ret) + return ret; + + *eerd = ctrl1 & RV3028_CTRL1_EERD; + if (*eerd) + return 0; + + ret = regmap_update_bits(rv3028->regmap, RV3028_CTRL1, + RV3028_CTRL1_EERD, RV3028_CTRL1_EERD); + if (ret) + return ret; + + ret = regmap_read_poll_timeout(rv3028->regmap, RV3028_STATUS, status, + !(status & RV3028_STATUS_EEBUSY), + RV3028_EEBUSY_POLL, RV3028_EEBUSY_TIMEOUT); + if (ret) { + rv3028_exit_eerd(rv3028, *eerd); + + return ret; + } + + return 0; +} + +static int rv3028_update_eeprom(struct rv3028_data *rv3028, u32 eerd) +{ + u32 status; + int ret; + + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_CMD, 0x0); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_CMD, RV3028_EEPROM_CMD_UPDATE); + if (ret) + goto exit_eerd; + + usleep_range(63000, RV3028_EEBUSY_TIMEOUT); + + ret = regmap_read_poll_timeout(rv3028->regmap, RV3028_STATUS, status, + !(status & RV3028_STATUS_EEBUSY), + RV3028_EEBUSY_POLL, RV3028_EEBUSY_TIMEOUT); + +exit_eerd: + rv3028_exit_eerd(rv3028, eerd); + + return ret; +} + +static int rv3028_update_cfg(struct rv3028_data *rv3028, unsigned int reg, + unsigned int mask, unsigned int val) +{ + u32 eerd; + int ret; + + ret = rv3028_enter_eerd(rv3028, &eerd); + if (ret) + return ret; + + ret = regmap_update_bits(rv3028->regmap, reg, mask, val); + if (ret) { + rv3028_exit_eerd(rv3028, eerd); + return ret; + } + + return rv3028_update_eeprom(rv3028, eerd); +} + static irqreturn_t rv3028_handle_irq(int irq, void *dev_id) { struct rv3028_data *rv3028 = dev_id; @@ -404,17 +487,32 @@ static int rv3028_read_offset(struct device *dev, long *offset) static int rv3028_set_offset(struct device *dev, long offset) { struct rv3028_data *rv3028 = dev_get_drvdata(dev); + u32 eerd; int ret; offset = clamp(offset, -244141L, 243187L) * 1000; offset = DIV_ROUND_CLOSEST(offset, OFFSET_STEP_PPT); - ret = regmap_write(rv3028->regmap, RV3028_OFFSET, offset >> 1); - if (ret < 0) + ret = rv3028_enter_eerd(rv3028, &eerd); + if (ret) return ret; - return regmap_update_bits(rv3028->regmap, RV3028_BACKUP, BIT(7), - offset << 7); + ret = regmap_write(rv3028->regmap, RV3028_OFFSET, offset >> 1); + if (ret < 0) + goto exit_eerd; + + ret = regmap_update_bits(rv3028->regmap, RV3028_BACKUP, BIT(7), + offset << 7); + if (ret < 0) + goto exit_eerd; + + return rv3028_update_eeprom(rv3028, eerd); + +exit_eerd: + rv3028_exit_eerd(rv3028, eerd); + + return ret; + } static int rv3028_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) @@ -451,49 +549,36 @@ static int rv3028_nvram_read(void *priv, unsigned int offset, void *val, static int rv3028_eeprom_write(void *priv, unsigned int offset, void *val, size_t bytes) { - u32 status, ctrl1; - int i, ret, err; + struct rv3028_data *rv3028 = priv; + u32 status, eerd; + int i, ret; u8 *buf = val; - ret = regmap_read(priv, RV3028_CTRL1, &ctrl1); + ret = rv3028_enter_eerd(rv3028, &eerd); if (ret) return ret; - if (!(ctrl1 & RV3028_CTRL1_EERD)) { - ret = regmap_update_bits(priv, RV3028_CTRL1, - RV3028_CTRL1_EERD, RV3028_CTRL1_EERD); - if (ret) - return ret; - - ret = regmap_read_poll_timeout(priv, RV3028_STATUS, status, - !(status & RV3028_STATUS_EEBUSY), - RV3028_EEBUSY_POLL, - RV3028_EEBUSY_TIMEOUT); - if (ret) - goto restore_eerd; - } - for (i = 0; i < bytes; i++) { - ret = regmap_write(priv, RV3028_EEPROM_ADDR, offset + i); + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_ADDR, offset + i); if (ret) goto restore_eerd; - ret = regmap_write(priv, RV3028_EEPROM_DATA, buf[i]); + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_DATA, buf[i]); if (ret) goto restore_eerd; - ret = regmap_write(priv, RV3028_EEPROM_CMD, 0x0); + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_CMD, 0x0); if (ret) goto restore_eerd; - ret = regmap_write(priv, RV3028_EEPROM_CMD, + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_CMD, RV3028_EEPROM_CMD_WRITE); if (ret) goto restore_eerd; usleep_range(RV3028_EEBUSY_POLL, RV3028_EEBUSY_TIMEOUT); - ret = regmap_read_poll_timeout(priv, RV3028_STATUS, status, + ret = regmap_read_poll_timeout(rv3028->regmap, RV3028_STATUS, status, !(status & RV3028_STATUS_EEBUSY), RV3028_EEBUSY_POLL, RV3028_EEBUSY_TIMEOUT); @@ -502,13 +587,7 @@ static int rv3028_eeprom_write(void *priv, unsigned int offset, void *val, } restore_eerd: - if (!(ctrl1 & RV3028_CTRL1_EERD)) - { - err = regmap_update_bits(priv, RV3028_CTRL1, RV3028_CTRL1_EERD, - 0); - if (err && !ret) - ret = err; - } + rv3028_exit_eerd(rv3028, eerd); return ret; } @@ -516,63 +595,44 @@ static int rv3028_eeprom_write(void *priv, unsigned int offset, void *val, static int rv3028_eeprom_read(void *priv, unsigned int offset, void *val, size_t bytes) { - u32 status, ctrl1, data; - int i, ret, err; + struct rv3028_data *rv3028 = priv; + u32 status, eerd, data; + int i, ret; u8 *buf = val; - ret = regmap_read(priv, RV3028_CTRL1, &ctrl1); + ret = rv3028_enter_eerd(rv3028, &eerd); if (ret) return ret; - if (!(ctrl1 & RV3028_CTRL1_EERD)) { - ret = regmap_update_bits(priv, RV3028_CTRL1, - RV3028_CTRL1_EERD, RV3028_CTRL1_EERD); - if (ret) - return ret; - - ret = regmap_read_poll_timeout(priv, RV3028_STATUS, status, - !(status & RV3028_STATUS_EEBUSY), - RV3028_EEBUSY_POLL, - RV3028_EEBUSY_TIMEOUT); - if (ret) - goto restore_eerd; - } - for (i = 0; i < bytes; i++) { - ret = regmap_write(priv, RV3028_EEPROM_ADDR, offset + i); + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_ADDR, offset + i); if (ret) goto restore_eerd; - ret = regmap_write(priv, RV3028_EEPROM_CMD, 0x0); + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_CMD, 0x0); if (ret) goto restore_eerd; - ret = regmap_write(priv, RV3028_EEPROM_CMD, + ret = regmap_write(rv3028->regmap, RV3028_EEPROM_CMD, RV3028_EEPROM_CMD_READ); if (ret) goto restore_eerd; - ret = regmap_read_poll_timeout(priv, RV3028_STATUS, status, + ret = regmap_read_poll_timeout(rv3028->regmap, RV3028_STATUS, status, !(status & RV3028_STATUS_EEBUSY), RV3028_EEBUSY_POLL, RV3028_EEBUSY_TIMEOUT); if (ret) goto restore_eerd; - ret = regmap_read(priv, RV3028_EEPROM_DATA, &data); + ret = regmap_read(rv3028->regmap, RV3028_EEPROM_DATA, &data); if (ret) goto restore_eerd; buf[i] = data; } restore_eerd: - if (!(ctrl1 & RV3028_CTRL1_EERD)) - { - err = regmap_update_bits(priv, RV3028_CTRL1, RV3028_CTRL1_EERD, - 0); - if (err && !ret) - ret = err; - } + rv3028_exit_eerd(rv3028, eerd); return ret; } @@ -619,24 +679,23 @@ static int rv3028_clkout_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { int i, ret; + u32 enabled; struct rv3028_data *rv3028 = clkout_hw_to_rv3028(hw); + ret = regmap_read(rv3028->regmap, RV3028_CLKOUT, &enabled); + if (ret < 0) + return ret; + ret = regmap_write(rv3028->regmap, RV3028_CLKOUT, 0x0); if (ret < 0) return ret; - for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) { - if (clkout_rates[i] == rate) { - ret = regmap_update_bits(rv3028->regmap, - RV3028_CLKOUT, - RV3028_CLKOUT_FD_MASK, i); - if (ret < 0) - return ret; + enabled &= RV3028_CLKOUT_CLKOE; - return regmap_write(rv3028->regmap, RV3028_CLKOUT, - RV3028_CLKOUT_CLKSY | RV3028_CLKOUT_CLKOE); - } - } + for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) + if (clkout_rates[i] == rate) + return rv3028_update_cfg(rv3028, RV3028_CLKOUT, 0xff, + RV3028_CLKOUT_CLKSY | enabled | i); return -EINVAL; } @@ -811,10 +870,8 @@ static int rv3028_probe(struct i2c_client *client) break; if (i < ARRAY_SIZE(rv3028_trickle_resistors)) { - ret = regmap_update_bits(rv3028->regmap, RV3028_BACKUP, - RV3028_BACKUP_TCE | - RV3028_BACKUP_TCR_MASK, - RV3028_BACKUP_TCE | i); + ret = rv3028_update_cfg(rv3028, RV3028_BACKUP, RV3028_BACKUP_TCE | + RV3028_BACKUP_TCR_MASK, RV3028_BACKUP_TCE | i); if (ret) return ret; } else { @@ -835,7 +892,7 @@ static int rv3028_probe(struct i2c_client *client) nvmem_cfg.priv = rv3028->regmap; rtc_nvmem_register(rv3028->rtc, &nvmem_cfg); - eeprom_cfg.priv = rv3028->regmap; + eeprom_cfg.priv = rv3028; rtc_nvmem_register(rv3028->rtc, &eeprom_cfg); rv3028->rtc->max_user_freq = 1; diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c new file mode 100644 index 000000000000..3e67f71f4261 --- /dev/null +++ b/drivers/rtc/rtc-rv3032.c @@ -0,0 +1,925 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RTC driver for the Micro Crystal RV3032 + * + * Copyright (C) 2020 Micro Crystal SA + * + * Alexandre Belloni + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RV3032_SEC 0x01 +#define RV3032_MIN 0x02 +#define RV3032_HOUR 0x03 +#define RV3032_WDAY 0x04 +#define RV3032_DAY 0x05 +#define RV3032_MONTH 0x06 +#define RV3032_YEAR 0x07 +#define RV3032_ALARM_MIN 0x08 +#define RV3032_ALARM_HOUR 0x09 +#define RV3032_ALARM_DAY 0x0A +#define RV3032_STATUS 0x0D +#define RV3032_TLSB 0x0E +#define RV3032_TMSB 0x0F +#define RV3032_CTRL1 0x10 +#define RV3032_CTRL2 0x11 +#define RV3032_CTRL3 0x12 +#define RV3032_TS_CTRL 0x13 +#define RV3032_CLK_IRQ 0x14 +#define RV3032_EEPROM_ADDR 0x3D +#define RV3032_EEPROM_DATA 0x3E +#define RV3032_EEPROM_CMD 0x3F +#define RV3032_RAM1 0x40 +#define RV3032_PMU 0xC0 +#define RV3032_OFFSET 0xC1 +#define RV3032_CLKOUT1 0xC2 +#define RV3032_CLKOUT2 0xC3 +#define RV3032_TREF0 0xC4 +#define RV3032_TREF1 0xC5 + +#define RV3032_STATUS_VLF BIT(0) +#define RV3032_STATUS_PORF BIT(1) +#define RV3032_STATUS_EVF BIT(2) +#define RV3032_STATUS_AF BIT(3) +#define RV3032_STATUS_TF BIT(4) +#define RV3032_STATUS_UF BIT(5) +#define RV3032_STATUS_TLF BIT(6) +#define RV3032_STATUS_THF BIT(7) + +#define RV3032_TLSB_CLKF BIT(1) +#define RV3032_TLSB_EEBUSY BIT(2) +#define RV3032_TLSB_TEMP GENMASK(7, 4) + +#define RV3032_CLKOUT2_HFD_MSK GENMASK(4, 0) +#define RV3032_CLKOUT2_FD_MSK GENMASK(6, 5) +#define RV3032_CLKOUT2_OS BIT(7) + +#define RV3032_CTRL1_EERD BIT(3) +#define RV3032_CTRL1_WADA BIT(5) + +#define RV3032_CTRL2_STOP BIT(0) +#define RV3032_CTRL2_EIE BIT(2) +#define RV3032_CTRL2_AIE BIT(3) +#define RV3032_CTRL2_TIE BIT(4) +#define RV3032_CTRL2_UIE BIT(5) +#define RV3032_CTRL2_CLKIE BIT(6) +#define RV3032_CTRL2_TSE BIT(7) + +#define RV3032_PMU_TCM GENMASK(1, 0) +#define RV3032_PMU_TCR GENMASK(3, 2) +#define RV3032_PMU_BSM GENMASK(5, 4) +#define RV3032_PMU_NCLKE BIT(6) + +#define RV3032_PMU_BSM_DSM 1 +#define RV3032_PMU_BSM_LSM 2 + +#define RV3032_OFFSET_MSK GENMASK(5, 0) + +#define RV3032_EVT_CTRL_TSR BIT(2) + +#define RV3032_EEPROM_CMD_UPDATE 0x11 +#define RV3032_EEPROM_CMD_WRITE 0x21 +#define RV3032_EEPROM_CMD_READ 0x22 + +#define RV3032_EEPROM_USER 0xCB + +#define RV3032_EEBUSY_POLL 10000 +#define RV3032_EEBUSY_TIMEOUT 100000 + +#define OFFSET_STEP_PPT 238419 + +struct rv3032_data { + struct regmap *regmap; + struct rtc_device *rtc; +#ifdef CONFIG_COMMON_CLK + struct clk_hw clkout_hw; +#endif +}; + +static u16 rv3032_trickle_resistors[] = {1000, 2000, 7000, 11000}; +static u16 rv3032_trickle_voltages[] = {0, 1750, 3000, 4400}; + +static int rv3032_exit_eerd(struct rv3032_data *rv3032, u32 eerd) +{ + if (eerd) + return 0; + + return regmap_update_bits(rv3032->regmap, RV3032_CTRL1, RV3032_CTRL1_EERD, 0); +} + +static int rv3032_enter_eerd(struct rv3032_data *rv3032, u32 *eerd) +{ + u32 ctrl1, status; + int ret; + + ret = regmap_read(rv3032->regmap, RV3032_CTRL1, &ctrl1); + if (ret) + return ret; + + *eerd = ctrl1 & RV3032_CTRL1_EERD; + if (*eerd) + return 0; + + ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL1, + RV3032_CTRL1_EERD, RV3032_CTRL1_EERD); + if (ret) + return ret; + + ret = regmap_read_poll_timeout(rv3032->regmap, RV3032_TLSB, status, + !(status & RV3032_TLSB_EEBUSY), + RV3032_EEBUSY_POLL, RV3032_EEBUSY_TIMEOUT); + if (ret) { + rv3032_exit_eerd(rv3032, *eerd); + + return ret; + } + + return 0; +} + +static int rv3032_update_cfg(struct rv3032_data *rv3032, unsigned int reg, + unsigned int mask, unsigned int val) +{ + u32 status, eerd; + int ret; + + ret = rv3032_enter_eerd(rv3032, &eerd); + if (ret) + return ret; + + ret = regmap_update_bits(rv3032->regmap, reg, mask, val); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_CMD, RV3032_EEPROM_CMD_UPDATE); + if (ret) + goto exit_eerd; + + usleep_range(46000, RV3032_EEBUSY_TIMEOUT); + + ret = regmap_read_poll_timeout(rv3032->regmap, RV3032_TLSB, status, + !(status & RV3032_TLSB_EEBUSY), + RV3032_EEBUSY_POLL, RV3032_EEBUSY_TIMEOUT); + +exit_eerd: + rv3032_exit_eerd(rv3032, eerd); + + return ret; +} + +static irqreturn_t rv3032_handle_irq(int irq, void *dev_id) +{ + struct rv3032_data *rv3032 = dev_id; + unsigned long events = 0; + u32 status = 0, ctrl = 0; + + if (regmap_read(rv3032->regmap, RV3032_STATUS, &status) < 0 || + status == 0) { + return IRQ_NONE; + } + + if (status & RV3032_STATUS_TF) { + status |= RV3032_STATUS_TF; + ctrl |= RV3032_CTRL2_TIE; + events |= RTC_PF; + } + + if (status & RV3032_STATUS_AF) { + status |= RV3032_STATUS_AF; + ctrl |= RV3032_CTRL2_AIE; + events |= RTC_AF; + } + + if (status & RV3032_STATUS_UF) { + status |= RV3032_STATUS_UF; + ctrl |= RV3032_CTRL2_UIE; + events |= RTC_UF; + } + + if (events) { + rtc_update_irq(rv3032->rtc, 1, events); + regmap_update_bits(rv3032->regmap, RV3032_STATUS, status, 0); + regmap_update_bits(rv3032->regmap, RV3032_CTRL2, ctrl, 0); + } + + return IRQ_HANDLED; +} + +static int rv3032_get_time(struct device *dev, struct rtc_time *tm) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + u8 date[7]; + int ret, status; + + ret = regmap_read(rv3032->regmap, RV3032_STATUS, &status); + if (ret < 0) + return ret; + + if (status & (RV3032_STATUS_PORF | RV3032_STATUS_VLF)) + return -EINVAL; + + ret = regmap_bulk_read(rv3032->regmap, RV3032_SEC, date, sizeof(date)); + if (ret) + return ret; + + tm->tm_sec = bcd2bin(date[0] & 0x7f); + tm->tm_min = bcd2bin(date[1] & 0x7f); + tm->tm_hour = bcd2bin(date[2] & 0x3f); + tm->tm_wday = date[3] & 0x7; + tm->tm_mday = bcd2bin(date[4] & 0x3f); + tm->tm_mon = bcd2bin(date[5] & 0x1f) - 1; + tm->tm_year = bcd2bin(date[6]) + 100; + + return 0; +} + +static int rv3032_set_time(struct device *dev, struct rtc_time *tm) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + u8 date[7]; + int ret; + + date[0] = bin2bcd(tm->tm_sec); + date[1] = bin2bcd(tm->tm_min); + date[2] = bin2bcd(tm->tm_hour); + date[3] = tm->tm_wday; + date[4] = bin2bcd(tm->tm_mday); + date[5] = bin2bcd(tm->tm_mon + 1); + date[6] = bin2bcd(tm->tm_year - 100); + + ret = regmap_bulk_write(rv3032->regmap, RV3032_SEC, date, + sizeof(date)); + if (ret) + return ret; + + ret = regmap_update_bits(rv3032->regmap, RV3032_STATUS, + RV3032_STATUS_PORF | RV3032_STATUS_VLF, 0); + + return ret; +} + +static int rv3032_get_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + u8 alarmvals[3]; + int status, ctrl, ret; + + ret = regmap_bulk_read(rv3032->regmap, RV3032_ALARM_MIN, alarmvals, + sizeof(alarmvals)); + if (ret) + return ret; + + ret = regmap_read(rv3032->regmap, RV3032_STATUS, &status); + if (ret < 0) + return ret; + + ret = regmap_read(rv3032->regmap, RV3032_CTRL2, &ctrl); + if (ret < 0) + return ret; + + alrm->time.tm_sec = 0; + alrm->time.tm_min = bcd2bin(alarmvals[0] & 0x7f); + alrm->time.tm_hour = bcd2bin(alarmvals[1] & 0x3f); + alrm->time.tm_mday = bcd2bin(alarmvals[2] & 0x3f); + + alrm->enabled = !!(ctrl & RV3032_CTRL2_AIE); + alrm->pending = (status & RV3032_STATUS_AF) && alrm->enabled; + + return 0; +} + +static int rv3032_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + u8 alarmvals[3]; + u8 ctrl = 0; + int ret; + + /* The alarm has no seconds, round up to nearest minute */ + if (alrm->time.tm_sec) { + time64_t alarm_time = rtc_tm_to_time64(&alrm->time); + + alarm_time += 60 - alrm->time.tm_sec; + rtc_time64_to_tm(alarm_time, &alrm->time); + } + + ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL2, + RV3032_CTRL2_AIE | RV3032_CTRL2_UIE, 0); + if (ret) + return ret; + + alarmvals[0] = bin2bcd(alrm->time.tm_min); + alarmvals[1] = bin2bcd(alrm->time.tm_hour); + alarmvals[2] = bin2bcd(alrm->time.tm_mday); + + ret = regmap_update_bits(rv3032->regmap, RV3032_STATUS, + RV3032_STATUS_AF, 0); + if (ret) + return ret; + + ret = regmap_bulk_write(rv3032->regmap, RV3032_ALARM_MIN, alarmvals, + sizeof(alarmvals)); + if (ret) + return ret; + + if (alrm->enabled) { + if (rv3032->rtc->uie_rtctimer.enabled) + ctrl |= RV3032_CTRL2_UIE; + if (rv3032->rtc->aie_timer.enabled) + ctrl |= RV3032_CTRL2_AIE; + } + + ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL2, + RV3032_CTRL2_UIE | RV3032_CTRL2_AIE, ctrl); + + return ret; +} + +static int rv3032_alarm_irq_enable(struct device *dev, unsigned int enabled) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + int ctrl = 0, ret; + + if (enabled) { + if (rv3032->rtc->uie_rtctimer.enabled) + ctrl |= RV3032_CTRL2_UIE; + if (rv3032->rtc->aie_timer.enabled) + ctrl |= RV3032_CTRL2_AIE; + } + + ret = regmap_update_bits(rv3032->regmap, RV3032_STATUS, + RV3032_STATUS_AF | RV3032_STATUS_UF, 0); + if (ret) + return ret; + + ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL2, + RV3032_CTRL2_UIE | RV3032_CTRL2_AIE, ctrl); + if (ret) + return ret; + + return 0; +} + +static int rv3032_read_offset(struct device *dev, long *offset) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + int ret, value, steps; + + ret = regmap_read(rv3032->regmap, RV3032_OFFSET, &value); + if (ret < 0) + return ret; + + steps = sign_extend32(FIELD_GET(RV3032_OFFSET_MSK, value), 5); + + *offset = DIV_ROUND_CLOSEST(steps * OFFSET_STEP_PPT, 1000); + + return 0; +} + +static int rv3032_set_offset(struct device *dev, long offset) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + + offset = clamp(offset, -7629L, 7391L) * 1000; + offset = DIV_ROUND_CLOSEST(offset, OFFSET_STEP_PPT); + + return rv3032_update_cfg(rv3032, RV3032_OFFSET, RV3032_OFFSET_MSK, + FIELD_PREP(RV3032_OFFSET_MSK, offset)); +} + +static int rv3032_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + int status, val = 0, ret = 0; + + switch (cmd) { + case RTC_VL_READ: + ret = regmap_read(rv3032->regmap, RV3032_STATUS, &status); + if (ret < 0) + return ret; + + if (status & (RV3032_STATUS_PORF | RV3032_STATUS_VLF)) + val = RTC_VL_DATA_INVALID; + return put_user(val, (unsigned int __user *)arg); + + default: + return -ENOIOCTLCMD; + } +} + +static int rv3032_nvram_write(void *priv, unsigned int offset, void *val, size_t bytes) +{ + return regmap_bulk_write(priv, RV3032_RAM1 + offset, val, bytes); +} + +static int rv3032_nvram_read(void *priv, unsigned int offset, void *val, size_t bytes) +{ + return regmap_bulk_read(priv, RV3032_RAM1 + offset, val, bytes); +} + +static int rv3032_eeprom_write(void *priv, unsigned int offset, void *val, size_t bytes) +{ + struct rv3032_data *rv3032 = priv; + u32 status, eerd; + int i, ret; + u8 *buf = val; + + ret = rv3032_enter_eerd(rv3032, &eerd); + if (ret) + return ret; + + for (i = 0; i < bytes; i++) { + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_ADDR, + RV3032_EEPROM_USER + offset + i); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_DATA, buf[i]); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_CMD, + RV3032_EEPROM_CMD_WRITE); + if (ret) + goto exit_eerd; + + usleep_range(RV3032_EEBUSY_POLL, RV3032_EEBUSY_TIMEOUT); + + ret = regmap_read_poll_timeout(rv3032->regmap, RV3032_TLSB, status, + !(status & RV3032_TLSB_EEBUSY), + RV3032_EEBUSY_POLL, RV3032_EEBUSY_TIMEOUT); + if (ret) + goto exit_eerd; + } + +exit_eerd: + rv3032_exit_eerd(rv3032, eerd); + + return ret; +} + +static int rv3032_eeprom_read(void *priv, unsigned int offset, void *val, size_t bytes) +{ + struct rv3032_data *rv3032 = priv; + u32 status, eerd, data; + int i, ret; + u8 *buf = val; + + ret = rv3032_enter_eerd(rv3032, &eerd); + if (ret) + return ret; + + for (i = 0; i < bytes; i++) { + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_ADDR, + RV3032_EEPROM_USER + offset + i); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_CMD, + RV3032_EEPROM_CMD_READ); + if (ret) + goto exit_eerd; + + ret = regmap_read_poll_timeout(rv3032->regmap, RV3032_TLSB, status, + !(status & RV3032_TLSB_EEBUSY), + RV3032_EEBUSY_POLL, RV3032_EEBUSY_TIMEOUT); + if (ret) + goto exit_eerd; + + ret = regmap_read(rv3032->regmap, RV3032_EEPROM_DATA, &data); + if (ret) + goto exit_eerd; + buf[i] = data; + } + +exit_eerd: + rv3032_exit_eerd(rv3032, eerd); + + return ret; +} + +static int rv3032_trickle_charger_setup(struct device *dev, struct rv3032_data *rv3032) +{ + u32 val, ohms, voltage; + int i; + + val = FIELD_PREP(RV3032_PMU_TCM, 1) | FIELD_PREP(RV3032_PMU_BSM, RV3032_PMU_BSM_DSM); + if (!device_property_read_u32(dev, "trickle-voltage-millivolt", &voltage)) { + for (i = 0; i < ARRAY_SIZE(rv3032_trickle_voltages); i++) + if (voltage == rv3032_trickle_voltages[i]) + break; + if (i < ARRAY_SIZE(rv3032_trickle_voltages)) + val = FIELD_PREP(RV3032_PMU_TCM, i) | + FIELD_PREP(RV3032_PMU_BSM, RV3032_PMU_BSM_LSM); + } + + if (device_property_read_u32(dev, "trickle-resistor-ohms", &ohms)) + return 0; + + for (i = 0; i < ARRAY_SIZE(rv3032_trickle_resistors); i++) + if (ohms == rv3032_trickle_resistors[i]) + break; + + if (i >= ARRAY_SIZE(rv3032_trickle_resistors)) { + dev_warn(dev, "invalid trickle resistor value\n"); + + return 0; + } + + return rv3032_update_cfg(rv3032, RV3032_PMU, + RV3032_PMU_TCR | RV3032_PMU_TCM | RV3032_PMU_BSM, + val | FIELD_PREP(RV3032_PMU_TCR, i)); +} + +#ifdef CONFIG_COMMON_CLK +#define clkout_hw_to_rv3032(hw) container_of(hw, struct rv3032_data, clkout_hw) + +static int clkout_xtal_rates[] = { + 32768, + 1024, + 64, + 1, +}; + +#define RV3032_HFD_STEP 8192 + +static unsigned long rv3032_clkout_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + int clkout, ret; + struct rv3032_data *rv3032 = clkout_hw_to_rv3032(hw); + + ret = regmap_read(rv3032->regmap, RV3032_CLKOUT2, &clkout); + if (ret < 0) + return 0; + + if (clkout & RV3032_CLKOUT2_OS) { + unsigned long rate = FIELD_GET(RV3032_CLKOUT2_HFD_MSK, clkout) << 8; + + ret = regmap_read(rv3032->regmap, RV3032_CLKOUT1, &clkout); + if (ret < 0) + return 0; + + rate += clkout + 1; + + return rate * RV3032_HFD_STEP; + } + + return clkout_xtal_rates[FIELD_GET(RV3032_CLKOUT2_FD_MSK, clkout)]; +} + +static long rv3032_clkout_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + int i, hfd; + + if (rate < RV3032_HFD_STEP) + for (i = 0; i < ARRAY_SIZE(clkout_xtal_rates); i++) + if (clkout_xtal_rates[i] <= rate) + return clkout_xtal_rates[i]; + + hfd = DIV_ROUND_CLOSEST(rate, RV3032_HFD_STEP); + + return RV3032_HFD_STEP * clamp(hfd, 0, 8192); +} + +static int rv3032_clkout_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct rv3032_data *rv3032 = clkout_hw_to_rv3032(hw); + u32 status, eerd; + int i, hfd, ret; + + for (i = 0; i < ARRAY_SIZE(clkout_xtal_rates); i++) { + if (clkout_xtal_rates[i] == rate) { + return rv3032_update_cfg(rv3032, RV3032_CLKOUT2, 0xff, + FIELD_PREP(RV3032_CLKOUT2_FD_MSK, i)); + } + } + + hfd = DIV_ROUND_CLOSEST(rate, RV3032_HFD_STEP); + hfd = clamp(hfd, 1, 8192) - 1; + + ret = rv3032_enter_eerd(rv3032, &eerd); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3032->regmap, RV3032_CLKOUT1, hfd & 0xff); + if (ret) + return ret; + + ret = regmap_write(rv3032->regmap, RV3032_CLKOUT2, RV3032_CLKOUT2_OS | + FIELD_PREP(RV3032_CLKOUT2_HFD_MSK, hfd >> 8)); + if (ret) + goto exit_eerd; + + ret = regmap_write(rv3032->regmap, RV3032_EEPROM_CMD, RV3032_EEPROM_CMD_UPDATE); + if (ret) + goto exit_eerd; + + usleep_range(46000, RV3032_EEBUSY_TIMEOUT); + + ret = regmap_read_poll_timeout(rv3032->regmap, RV3032_TLSB, status, + !(status & RV3032_TLSB_EEBUSY), + RV3032_EEBUSY_POLL, RV3032_EEBUSY_TIMEOUT); + +exit_eerd: + rv3032_exit_eerd(rv3032, eerd); + + return ret; +} + +static int rv3032_clkout_prepare(struct clk_hw *hw) +{ + struct rv3032_data *rv3032 = clkout_hw_to_rv3032(hw); + + return rv3032_update_cfg(rv3032, RV3032_PMU, RV3032_PMU_NCLKE, 0); +} + +static void rv3032_clkout_unprepare(struct clk_hw *hw) +{ + struct rv3032_data *rv3032 = clkout_hw_to_rv3032(hw); + + rv3032_update_cfg(rv3032, RV3032_PMU, RV3032_PMU_NCLKE, RV3032_PMU_NCLKE); +} + +static int rv3032_clkout_is_prepared(struct clk_hw *hw) +{ + int val, ret; + struct rv3032_data *rv3032 = clkout_hw_to_rv3032(hw); + + ret = regmap_read(rv3032->regmap, RV3032_PMU, &val); + if (ret < 0) + return ret; + + return !(val & RV3032_PMU_NCLKE); +} + +static const struct clk_ops rv3032_clkout_ops = { + .prepare = rv3032_clkout_prepare, + .unprepare = rv3032_clkout_unprepare, + .is_prepared = rv3032_clkout_is_prepared, + .recalc_rate = rv3032_clkout_recalc_rate, + .round_rate = rv3032_clkout_round_rate, + .set_rate = rv3032_clkout_set_rate, +}; + +static int rv3032_clkout_register_clk(struct rv3032_data *rv3032, + struct i2c_client *client) +{ + int ret; + struct clk *clk; + struct clk_init_data init; + struct device_node *node = client->dev.of_node; + + ret = regmap_update_bits(rv3032->regmap, RV3032_TLSB, RV3032_TLSB_CLKF, 0); + if (ret < 0) + return ret; + + ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL2, RV3032_CTRL2_CLKIE, 0); + if (ret < 0) + return ret; + + ret = regmap_write(rv3032->regmap, RV3032_CLK_IRQ, 0); + if (ret < 0) + return ret; + + init.name = "rv3032-clkout"; + init.ops = &rv3032_clkout_ops; + init.flags = 0; + init.parent_names = NULL; + init.num_parents = 0; + rv3032->clkout_hw.init = &init; + + of_property_read_string(node, "clock-output-names", &init.name); + + clk = devm_clk_register(&client->dev, &rv3032->clkout_hw); + if (!IS_ERR(clk)) + of_clk_add_provider(node, of_clk_src_simple_get, clk); + + return 0; +} +#endif + +static int rv3032_hwmon_read_temp(struct device *dev, long *mC) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + u8 buf[2]; + int temp, prev = 0; + int ret; + + ret = regmap_bulk_read(rv3032->regmap, RV3032_TLSB, buf, sizeof(buf)); + if (ret) + return ret; + + temp = sign_extend32(buf[1], 7) << 4; + temp |= FIELD_GET(RV3032_TLSB_TEMP, buf[0]); + + /* No blocking or shadowing on RV3032_TLSB and RV3032_TMSB */ + do { + prev = temp; + + ret = regmap_bulk_read(rv3032->regmap, RV3032_TLSB, buf, sizeof(buf)); + if (ret) + return ret; + + temp = sign_extend32(buf[1], 7) << 4; + temp |= FIELD_GET(RV3032_TLSB_TEMP, buf[0]); + } while (temp != prev); + + *mC = (temp * 1000) / 16; + + return 0; +} + +static umode_t rv3032_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, + u32 attr, int channel) +{ + if (type != hwmon_temp) + return 0; + + switch (attr) { + case hwmon_temp_input: + return 0444; + default: + return 0; + } +} + +static int rv3032_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *temp) +{ + int err; + + switch (attr) { + case hwmon_temp_input: + err = rv3032_hwmon_read_temp(dev, temp); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static const struct hwmon_channel_info *rv3032_hwmon_info[] = { + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MAX_HYST), + NULL +}; + +static const struct hwmon_ops rv3032_hwmon_hwmon_ops = { + .is_visible = rv3032_hwmon_is_visible, + .read = rv3032_hwmon_read, +}; + +static const struct hwmon_chip_info rv3032_hwmon_chip_info = { + .ops = &rv3032_hwmon_hwmon_ops, + .info = rv3032_hwmon_info, +}; + +static void rv3032_hwmon_register(struct device *dev) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + + if (!IS_REACHABLE(CONFIG_HWMON)) + return; + + devm_hwmon_device_register_with_info(dev, "rv3032", rv3032, &rv3032_hwmon_chip_info, NULL); +} + +static struct rtc_class_ops rv3032_rtc_ops = { + .read_time = rv3032_get_time, + .set_time = rv3032_set_time, + .read_offset = rv3032_read_offset, + .set_offset = rv3032_set_offset, + .ioctl = rv3032_ioctl, +}; + +static const struct regmap_config regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0xCA, +}; + +static int rv3032_probe(struct i2c_client *client) +{ + struct rv3032_data *rv3032; + int ret, status; + struct nvmem_config nvmem_cfg = { + .name = "rv3032_nvram", + .word_size = 1, + .stride = 1, + .size = 16, + .type = NVMEM_TYPE_BATTERY_BACKED, + .reg_read = rv3032_nvram_read, + .reg_write = rv3032_nvram_write, + }; + struct nvmem_config eeprom_cfg = { + .name = "rv3032_eeprom", + .word_size = 1, + .stride = 1, + .size = 32, + .type = NVMEM_TYPE_EEPROM, + .reg_read = rv3032_eeprom_read, + .reg_write = rv3032_eeprom_write, + }; + + rv3032 = devm_kzalloc(&client->dev, sizeof(struct rv3032_data), + GFP_KERNEL); + if (!rv3032) + return -ENOMEM; + + rv3032->regmap = devm_regmap_init_i2c(client, ®map_config); + if (IS_ERR(rv3032->regmap)) + return PTR_ERR(rv3032->regmap); + + i2c_set_clientdata(client, rv3032); + + ret = regmap_read(rv3032->regmap, RV3032_STATUS, &status); + if (ret < 0) + return ret; + + rv3032->rtc = devm_rtc_allocate_device(&client->dev); + if (IS_ERR(rv3032->rtc)) + return PTR_ERR(rv3032->rtc); + + if (client->irq > 0) { + ret = devm_request_threaded_irq(&client->dev, client->irq, + NULL, rv3032_handle_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + "rv3032", rv3032); + if (ret) { + dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n"); + client->irq = 0; + } else { + rv3032_rtc_ops.read_alarm = rv3032_get_alarm; + rv3032_rtc_ops.set_alarm = rv3032_set_alarm; + rv3032_rtc_ops.alarm_irq_enable = rv3032_alarm_irq_enable; + } + } + + ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL1, + RV3032_CTRL1_WADA, RV3032_CTRL1_WADA); + if (ret) + return ret; + + rv3032_trickle_charger_setup(&client->dev, rv3032); + + rv3032->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rv3032->rtc->range_max = RTC_TIMESTAMP_END_2099; + rv3032->rtc->ops = &rv3032_rtc_ops; + ret = rtc_register_device(rv3032->rtc); + if (ret) + return ret; + + nvmem_cfg.priv = rv3032; + rtc_nvmem_register(rv3032->rtc, &nvmem_cfg); + eeprom_cfg.priv = rv3032; + rtc_nvmem_register(rv3032->rtc, &eeprom_cfg); + + rv3032->rtc->max_user_freq = 1; + +#ifdef CONFIG_COMMON_CLK + rv3032_clkout_register_clk(rv3032, client); +#endif + + rv3032_hwmon_register(&client->dev); + + return 0; +} + +static const struct of_device_id rv3032_of_match[] = { + { .compatible = "microcrystal,rv3032", }, + { } +}; +MODULE_DEVICE_TABLE(of, rv3032_of_match); + +static struct i2c_driver rv3032_driver = { + .driver = { + .name = "rtc-rv3032", + .of_match_table = of_match_ptr(rv3032_of_match), + }, + .probe_new = rv3032_probe, +}; +module_i2c_driver(rv3032_driver); + +MODULE_AUTHOR("Alexandre Belloni "); +MODULE_DESCRIPTION("Micro Crystal RV3032 RTC driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index 93c3a6b627bd..c6d8e3425688 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -454,13 +454,7 @@ static int rv8803_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) static int rv8803_nvram_write(void *priv, unsigned int offset, void *val, size_t bytes) { - int ret; - - ret = rv8803_write_reg(priv, RV8803_RAM, *(u8 *)val); - if (ret) - return ret; - - return 0; + return rv8803_write_reg(priv, RV8803_RAM, *(u8 *)val); } static int rv8803_nvram_read(void *priv, unsigned int offset, diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index fe010151ec8f..dca41a2a39b2 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -11,42 +11,43 @@ #include #include #include +#include #include -#define RX8010_SEC 0x10 -#define RX8010_MIN 0x11 -#define RX8010_HOUR 0x12 -#define RX8010_WDAY 0x13 -#define RX8010_MDAY 0x14 -#define RX8010_MONTH 0x15 -#define RX8010_YEAR 0x16 -#define RX8010_RESV17 0x17 -#define RX8010_ALMIN 0x18 -#define RX8010_ALHOUR 0x19 -#define RX8010_ALWDAY 0x1A -#define RX8010_TCOUNT0 0x1B -#define RX8010_TCOUNT1 0x1C -#define RX8010_EXT 0x1D -#define RX8010_FLAG 0x1E -#define RX8010_CTRL 0x1F +#define RX8010_SEC 0x10 +#define RX8010_MIN 0x11 +#define RX8010_HOUR 0x12 +#define RX8010_WDAY 0x13 +#define RX8010_MDAY 0x14 +#define RX8010_MONTH 0x15 +#define RX8010_YEAR 0x16 +#define RX8010_RESV17 0x17 +#define RX8010_ALMIN 0x18 +#define RX8010_ALHOUR 0x19 +#define RX8010_ALWDAY 0x1A +#define RX8010_TCOUNT0 0x1B +#define RX8010_TCOUNT1 0x1C +#define RX8010_EXT 0x1D +#define RX8010_FLAG 0x1E +#define RX8010_CTRL 0x1F /* 0x20 to 0x2F are user registers */ -#define RX8010_RESV30 0x30 -#define RX8010_RESV31 0x31 -#define RX8010_IRQ 0x32 +#define RX8010_RESV30 0x30 +#define RX8010_RESV31 0x31 +#define RX8010_IRQ 0x32 -#define RX8010_EXT_WADA BIT(3) +#define RX8010_EXT_WADA BIT(3) -#define RX8010_FLAG_VLF BIT(1) -#define RX8010_FLAG_AF BIT(3) -#define RX8010_FLAG_TF BIT(4) -#define RX8010_FLAG_UF BIT(5) +#define RX8010_FLAG_VLF BIT(1) +#define RX8010_FLAG_AF BIT(3) +#define RX8010_FLAG_TF BIT(4) +#define RX8010_FLAG_UF BIT(5) -#define RX8010_CTRL_AIE BIT(3) -#define RX8010_CTRL_UIE BIT(5) -#define RX8010_CTRL_STOP BIT(6) -#define RX8010_CTRL_TEST BIT(7) +#define RX8010_CTRL_AIE BIT(3) +#define RX8010_CTRL_UIE BIT(5) +#define RX8010_CTRL_STOP BIT(6) +#define RX8010_CTRL_TEST BIT(7) -#define RX8010_ALARM_AE BIT(7) +#define RX8010_ALARM_AE BIT(7) static const struct i2c_device_id rx8010_id[] = { { "rx8010", 0 }, @@ -61,7 +62,7 @@ static const struct of_device_id rx8010_of_match[] = { MODULE_DEVICE_TABLE(of, rx8010_of_match); struct rx8010_data { - struct i2c_client *client; + struct regmap *regs; struct rtc_device *rtc; u8 ctrlreg; }; @@ -70,13 +71,12 @@ static irqreturn_t rx8010_irq_1_handler(int irq, void *dev_id) { struct i2c_client *client = dev_id; struct rx8010_data *rx8010 = i2c_get_clientdata(client); - int flagreg; + int flagreg, err; mutex_lock(&rx8010->rtc->ops_lock); - flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG); - - if (flagreg <= 0) { + err = regmap_read(rx8010->regs, RX8010_FLAG, &flagreg); + if (err) { mutex_unlock(&rx8010->rtc->ops_lock); return IRQ_NONE; } @@ -99,32 +99,29 @@ static irqreturn_t rx8010_irq_1_handler(int irq, void *dev_id) rtc_update_irq(rx8010->rtc, 1, RTC_UF | RTC_IRQF); } - i2c_smbus_write_byte_data(client, RX8010_FLAG, flagreg); - + err = regmap_write(rx8010->regs, RX8010_FLAG, flagreg); mutex_unlock(&rx8010->rtc->ops_lock); - return IRQ_HANDLED; + return err ? IRQ_NONE : IRQ_HANDLED; } static int rx8010_get_time(struct device *dev, struct rtc_time *dt) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); - u8 date[7]; - int flagreg; - int err; + u8 date[RX8010_YEAR - RX8010_SEC + 1]; + int flagreg, err; - flagreg = i2c_smbus_read_byte_data(rx8010->client, RX8010_FLAG); - if (flagreg < 0) - return flagreg; + err = regmap_read(rx8010->regs, RX8010_FLAG, &flagreg); + if (err) + return err; if (flagreg & RX8010_FLAG_VLF) { dev_warn(dev, "Frequency stop detected\n"); return -EINVAL; } - err = i2c_smbus_read_i2c_block_data(rx8010->client, RX8010_SEC, - 7, date); - if (err != 7) - return err < 0 ? err : -EIO; + err = regmap_bulk_read(rx8010->regs, RX8010_SEC, date, sizeof(date)); + if (err) + return err; dt->tm_sec = bcd2bin(date[RX8010_SEC - RX8010_SEC] & 0x7f); dt->tm_min = bcd2bin(date[RX8010_MIN - RX8010_SEC] & 0x7f); @@ -140,22 +137,13 @@ static int rx8010_get_time(struct device *dev, struct rtc_time *dt) static int rx8010_set_time(struct device *dev, struct rtc_time *dt) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); - u8 date[7]; - int ctrl, flagreg; - int ret; - - if ((dt->tm_year < 100) || (dt->tm_year > 199)) - return -EINVAL; + u8 date[RX8010_YEAR - RX8010_SEC + 1]; + int err; /* set STOP bit before changing clock/calendar */ - ctrl = i2c_smbus_read_byte_data(rx8010->client, RX8010_CTRL); - if (ctrl < 0) - return ctrl; - rx8010->ctrlreg = ctrl | RX8010_CTRL_STOP; - ret = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, - rx8010->ctrlreg); - if (ret < 0) - return ret; + err = regmap_set_bits(rx8010->regs, RX8010_CTRL, RX8010_CTRL_STOP); + if (err) + return err; date[RX8010_SEC - RX8010_SEC] = bin2bcd(dt->tm_sec); date[RX8010_MIN - RX8010_SEC] = bin2bcd(dt->tm_min); @@ -165,66 +153,54 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt) date[RX8010_YEAR - RX8010_SEC] = bin2bcd(dt->tm_year - 100); date[RX8010_WDAY - RX8010_SEC] = bin2bcd(1 << dt->tm_wday); - ret = i2c_smbus_write_i2c_block_data(rx8010->client, - RX8010_SEC, 7, date); - if (ret < 0) - return ret; + err = regmap_bulk_write(rx8010->regs, RX8010_SEC, date, sizeof(date)); + if (err) + return err; /* clear STOP bit after changing clock/calendar */ - ctrl = i2c_smbus_read_byte_data(rx8010->client, RX8010_CTRL); - if (ctrl < 0) - return ctrl; - rx8010->ctrlreg = ctrl & ~RX8010_CTRL_STOP; - ret = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, - rx8010->ctrlreg); - if (ret < 0) - return ret; + err = regmap_clear_bits(rx8010->regs, RX8010_CTRL, RX8010_CTRL_STOP); + if (err) + return err; - flagreg = i2c_smbus_read_byte_data(rx8010->client, RX8010_FLAG); - if (flagreg < 0) { - return flagreg; - } - - if (flagreg & RX8010_FLAG_VLF) - ret = i2c_smbus_write_byte_data(rx8010->client, RX8010_FLAG, - flagreg & ~RX8010_FLAG_VLF); + err = regmap_clear_bits(rx8010->regs, RX8010_FLAG, RX8010_FLAG_VLF); + if (err) + return err; return 0; } -static int rx8010_init_client(struct i2c_client *client) +static int rx8010_init(struct device *dev) { - struct rx8010_data *rx8010 = i2c_get_clientdata(client); + struct rx8010_data *rx8010 = dev_get_drvdata(dev); u8 ctrl[2]; - int need_clear = 0, err = 0; + int need_clear = 0, err; /* Initialize reserved registers as specified in datasheet */ - err = i2c_smbus_write_byte_data(client, RX8010_RESV17, 0xD8); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_RESV17, 0xD8); + if (err) return err; - err = i2c_smbus_write_byte_data(client, RX8010_RESV30, 0x00); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_RESV30, 0x00); + if (err) return err; - err = i2c_smbus_write_byte_data(client, RX8010_RESV31, 0x08); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_RESV31, 0x08); + if (err) return err; - err = i2c_smbus_write_byte_data(client, RX8010_IRQ, 0x00); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_IRQ, 0x00); + if (err) return err; - err = i2c_smbus_read_i2c_block_data(rx8010->client, RX8010_FLAG, - 2, ctrl); - if (err != 2) - return err < 0 ? err : -EIO; + err = regmap_bulk_read(rx8010->regs, RX8010_FLAG, ctrl, 2); + if (err) + return err; if (ctrl[0] & RX8010_FLAG_VLF) - dev_warn(&client->dev, "Frequency stop was detected\n"); + dev_warn(dev, "Frequency stop was detected\n"); if (ctrl[0] & RX8010_FLAG_AF) { - dev_warn(&client->dev, "Alarm was detected\n"); + dev_warn(dev, "Alarm was detected\n"); need_clear = 1; } @@ -236,8 +212,8 @@ static int rx8010_init_client(struct i2c_client *client) if (need_clear) { ctrl[0] &= ~(RX8010_FLAG_AF | RX8010_FLAG_TF | RX8010_FLAG_UF); - err = i2c_smbus_write_byte_data(client, RX8010_FLAG, ctrl[0]); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_FLAG, ctrl[0]); + if (err) return err; } @@ -249,18 +225,16 @@ static int rx8010_init_client(struct i2c_client *client) static int rx8010_read_alarm(struct device *dev, struct rtc_wkalrm *t) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); - struct i2c_client *client = rx8010->client; u8 alarmvals[3]; - int flagreg; - int err; + int flagreg, err; - err = i2c_smbus_read_i2c_block_data(client, RX8010_ALMIN, 3, alarmvals); - if (err != 3) - return err < 0 ? err : -EIO; + err = regmap_bulk_read(rx8010->regs, RX8010_ALMIN, alarmvals, 3); + if (err) + return err; - flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG); - if (flagreg < 0) - return flagreg; + err = regmap_read(rx8010->regs, RX8010_FLAG, &flagreg); + if (err) + return err; t->time.tm_sec = 0; t->time.tm_min = bcd2bin(alarmvals[0] & 0x7f); @@ -277,55 +251,38 @@ static int rx8010_read_alarm(struct device *dev, struct rtc_wkalrm *t) static int rx8010_set_alarm(struct device *dev, struct rtc_wkalrm *t) { - struct i2c_client *client = to_i2c_client(dev); struct rx8010_data *rx8010 = dev_get_drvdata(dev); u8 alarmvals[3]; - int extreg, flagreg; int err; - flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG); - if (flagreg < 0) { - return flagreg; - } - if (rx8010->ctrlreg & (RX8010_CTRL_AIE | RX8010_CTRL_UIE)) { rx8010->ctrlreg &= ~(RX8010_CTRL_AIE | RX8010_CTRL_UIE); - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, - rx8010->ctrlreg); - if (err < 0) { + err = regmap_write(rx8010->regs, RX8010_CTRL, rx8010->ctrlreg); + if (err) return err; - } } - flagreg &= ~RX8010_FLAG_AF; - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_FLAG, flagreg); - if (err < 0) + err = regmap_clear_bits(rx8010->regs, RX8010_FLAG, RX8010_FLAG_AF); + if (err) return err; alarmvals[0] = bin2bcd(t->time.tm_min); alarmvals[1] = bin2bcd(t->time.tm_hour); alarmvals[2] = bin2bcd(t->time.tm_mday); - err = i2c_smbus_write_i2c_block_data(rx8010->client, RX8010_ALMIN, - 2, alarmvals); - if (err < 0) + err = regmap_bulk_write(rx8010->regs, RX8010_ALMIN, alarmvals, 2); + if (err) return err; - extreg = i2c_smbus_read_byte_data(client, RX8010_EXT); - if (extreg < 0) - return extreg; - - extreg |= RX8010_EXT_WADA; - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_EXT, extreg); - if (err < 0) + err = regmap_clear_bits(rx8010->regs, RX8010_EXT, RX8010_EXT_WADA); + if (err) return err; if (alarmvals[2] == 0) alarmvals[2] |= RX8010_ALARM_AE; - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_ALWDAY, - alarmvals[2]); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_ALWDAY, alarmvals[2]); + if (err) return err; if (t->enabled) { @@ -335,9 +292,8 @@ static int rx8010_set_alarm(struct device *dev, struct rtc_wkalrm *t) rx8010->ctrlreg |= (RX8010_CTRL_AIE | RX8010_CTRL_UIE); - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, - rx8010->ctrlreg); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_CTRL, rx8010->ctrlreg); + if (err) return err; } @@ -347,11 +303,9 @@ static int rx8010_set_alarm(struct device *dev, struct rtc_wkalrm *t) static int rx8010_alarm_irq_enable(struct device *dev, unsigned int enabled) { - struct i2c_client *client = to_i2c_client(dev); struct rx8010_data *rx8010 = dev_get_drvdata(dev); - int flagreg; - u8 ctrl; int err; + u8 ctrl; ctrl = rx8010->ctrlreg; @@ -367,20 +321,14 @@ static int rx8010_alarm_irq_enable(struct device *dev, ctrl &= ~RX8010_CTRL_AIE; } - flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG); - if (flagreg < 0) - return flagreg; - - flagreg &= ~RX8010_FLAG_AF; - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_FLAG, flagreg); - if (err < 0) + err = regmap_clear_bits(rx8010->regs, RX8010_FLAG, RX8010_FLAG_AF); + if (err) return err; if (ctrl != rx8010->ctrlreg) { rx8010->ctrlreg = ctrl; - err = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL, - rx8010->ctrlreg); - if (err < 0) + err = regmap_write(rx8010->regs, RX8010_CTRL, rx8010->ctrlreg); + if (err) return err; } @@ -390,14 +338,13 @@ static int rx8010_alarm_irq_enable(struct device *dev, static int rx8010_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) { struct rx8010_data *rx8010 = dev_get_drvdata(dev); - int tmp; - int flagreg; + int tmp, flagreg, err; switch (cmd) { case RTC_VL_READ: - flagreg = i2c_smbus_read_byte_data(rx8010->client, RX8010_FLAG); - if (flagreg < 0) - return flagreg; + err = regmap_read(rx8010->regs, RX8010_FLAG, &flagreg); + if (err) + return err; tmp = flagreg & RX8010_FLAG_VLF ? RTC_VL_DATA_INVALID : 0; return put_user(tmp, (unsigned int __user *)arg); @@ -407,65 +354,72 @@ static int rx8010_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) } } -static struct rtc_class_ops rx8010_rtc_ops = { +static const struct rtc_class_ops rx8010_rtc_ops_default = { .read_time = rx8010_get_time, .set_time = rx8010_set_time, .ioctl = rx8010_ioctl, }; -static int rx8010_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static const struct rtc_class_ops rx8010_rtc_ops_alarm = { + .read_time = rx8010_get_time, + .set_time = rx8010_set_time, + .ioctl = rx8010_ioctl, + .read_alarm = rx8010_read_alarm, + .set_alarm = rx8010_set_alarm, + .alarm_irq_enable = rx8010_alarm_irq_enable, +}; + +static const struct regmap_config rx8010_regmap_config = { + .name = "rx8010-rtc", + .reg_bits = 8, + .val_bits = 8, +}; + +static int rx8010_probe(struct i2c_client *client) { - struct i2c_adapter *adapter = client->adapter; + struct device *dev = &client->dev; struct rx8010_data *rx8010; int err = 0; - if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA - | I2C_FUNC_SMBUS_I2C_BLOCK)) { - dev_err(&adapter->dev, "doesn't support required functionality\n"); - return -EIO; - } - - rx8010 = devm_kzalloc(&client->dev, sizeof(struct rx8010_data), - GFP_KERNEL); + rx8010 = devm_kzalloc(dev, sizeof(*rx8010), GFP_KERNEL); if (!rx8010) return -ENOMEM; - rx8010->client = client; i2c_set_clientdata(client, rx8010); - err = rx8010_init_client(client); + rx8010->regs = devm_regmap_init_i2c(client, &rx8010_regmap_config); + if (IS_ERR(rx8010->regs)) + return PTR_ERR(rx8010->regs); + + err = rx8010_init(dev); if (err) return err; + rx8010->rtc = devm_rtc_allocate_device(dev); + if (IS_ERR(rx8010->rtc)) + return PTR_ERR(rx8010->rtc); + if (client->irq > 0) { - dev_info(&client->dev, "IRQ %d supplied\n", client->irq); - err = devm_request_threaded_irq(&client->dev, client->irq, NULL, + dev_info(dev, "IRQ %d supplied\n", client->irq); + err = devm_request_threaded_irq(dev, client->irq, NULL, rx8010_irq_1_handler, IRQF_TRIGGER_LOW | IRQF_ONESHOT, "rx8010", client); - if (err) { - dev_err(&client->dev, "unable to request IRQ\n"); - client->irq = 0; - } else { - rx8010_rtc_ops.read_alarm = rx8010_read_alarm; - rx8010_rtc_ops.set_alarm = rx8010_set_alarm; - rx8010_rtc_ops.alarm_irq_enable = rx8010_alarm_irq_enable; + dev_err(dev, "unable to request IRQ\n"); + return err; } - } - rx8010->rtc = devm_rtc_device_register(&client->dev, client->name, - &rx8010_rtc_ops, THIS_MODULE); - - if (IS_ERR(rx8010->rtc)) { - dev_err(&client->dev, "unable to register the class device\n"); - return PTR_ERR(rx8010->rtc); + rx8010->rtc->ops = &rx8010_rtc_ops_alarm; + } else { + rx8010->rtc->ops = &rx8010_rtc_ops_default; } rx8010->rtc->max_user_freq = 1; + rx8010->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rx8010->rtc->range_max = RTC_TIMESTAMP_END_2099; - return 0; + return rtc_register_device(rx8010->rtc); } static struct i2c_driver rx8010_driver = { @@ -473,7 +427,7 @@ static struct i2c_driver rx8010_driver = { .name = "rtc-rx8010", .of_match_table = of_match_ptr(rx8010_of_match), }, - .probe = rx8010_probe, + .probe_new = rx8010_probe, .id_table = rx8010_id, }; diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index e1b50e682fc4..24a41909f049 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -494,13 +494,8 @@ static int s3c_rtc_probe(struct platform_device *pdev) if (info->data->needs_src_clk) { info->rtc_src_clk = devm_clk_get(&pdev->dev, "rtc_src"); if (IS_ERR(info->rtc_src_clk)) { - ret = PTR_ERR(info->rtc_src_clk); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "failed to find rtc source clock\n"); - else - dev_dbg(&pdev->dev, - "probe deferred due to missing rtc src clk\n"); + ret = dev_err_probe(&pdev->dev, PTR_ERR(info->rtc_src_clk), + "failed to find rtc source clock\n"); goto err_src_clk; } ret = clk_prepare_enable(info->rtc_src_clk); diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c index 51041dc08af4..0c65448b85ee 100644 --- a/drivers/rtc/rtc-st-lpc.c +++ b/drivers/rtc/rtc-st-lpc.c @@ -173,7 +173,7 @@ static int st_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *t) return 0; } -static struct rtc_class_ops st_rtc_ops = { +static const struct rtc_class_ops st_rtc_ops = { .read_time = st_rtc_read_time, .set_time = st_rtc_set_time, .read_alarm = st_rtc_read_alarm, diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 6ea761c84494..35c83f65475b 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -182,58 +182,15 @@ static int ceph_releasepage(struct page *page, gfp_t g) return !PagePrivate(page); } -/* - * Read some contiguous pages. If we cross a stripe boundary, shorten - * *plen. Return number of bytes read, or error. - */ -static int ceph_sync_readpages(struct ceph_fs_client *fsc, - struct ceph_vino vino, - struct ceph_file_layout *layout, - u64 off, u64 *plen, - u32 truncate_seq, u64 truncate_size, - struct page **pages, int num_pages, - int page_align) -{ - struct ceph_osd_client *osdc = &fsc->client->osdc; - struct ceph_osd_request *req; - int rc = 0; - - dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, - vino.snap, off, *plen); - req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1, - CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, - NULL, truncate_seq, truncate_size, - false); - if (IS_ERR(req)) - return PTR_ERR(req); - - /* it may be a short read due to an object boundary */ - osd_req_op_extent_osd_data_pages(req, 0, - pages, *plen, page_align, false, false); - - dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n", - off, *plen, *plen, page_align); - - rc = ceph_osdc_start_request(osdc, req, false); - if (!rc) - rc = ceph_osdc_wait_request(osdc, req); - - ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency, - req->r_end_latency, rc); - - ceph_osdc_put_request(req); - dout("readpages result %d\n", rc); - return rc; -} - -/* - * read a single page, without unlocking it. - */ +/* read a single page, without unlocking it. */ static int ceph_do_readpage(struct file *filp, struct page *page) { struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_osd_client *osdc = &fsc->client->osdc; + struct ceph_osd_request *req; + struct ceph_vino vino = ceph_vino(inode); int err = 0; u64 off = page_offset(page); u64 len = PAGE_SIZE; @@ -260,19 +217,33 @@ static int ceph_do_readpage(struct file *filp, struct page *page) if (err == 0) return -EINPROGRESS; - dout("readpage inode %p file %p page %p index %lu\n", - inode, filp, page, page->index); - err = ceph_sync_readpages(fsc, ceph_vino(inode), - &ci->i_layout, off, &len, - ci->i_truncate_seq, ci->i_truncate_size, - &page, 1, 0); + dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n", + vino.ino, vino.snap, filp, off, len, page, page->index); + req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, 0, 1, + CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, + ci->i_truncate_seq, ci->i_truncate_size, + false); + if (IS_ERR(req)) + return PTR_ERR(req); + + osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false); + + err = ceph_osdc_start_request(osdc, req, false); + if (!err) + err = ceph_osdc_wait_request(osdc, req); + + ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency, + req->r_end_latency, err); + + ceph_osdc_put_request(req); + dout("readpage result %d\n", err); + if (err == -ENOENT) err = 0; if (err < 0) { - SetPageError(page); ceph_fscache_readpage_cancel(inode, page); - if (err == -EBLACKLISTED) - fsc->blacklisted = true; + if (err == -EBLOCKLISTED) + fsc->blocklisted = true; goto out; } if (err < PAGE_SIZE) @@ -312,8 +283,8 @@ static void finish_read(struct ceph_osd_request *req) int i; dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes); - if (rc == -EBLACKLISTED) - ceph_inode_to_client(inode)->blacklisted = true; + if (rc == -EBLOCKLISTED) + ceph_inode_to_client(inode)->blocklisted = true; /* unlock all pages, zeroing any data we didn't read */ osd_data = osd_req_op_extent_osd_data(req, 0); @@ -619,50 +590,6 @@ static u64 get_writepages_data_length(struct inode *inode, return end > start ? end - start : 0; } -/* - * do a synchronous write on N pages - */ -static int ceph_sync_writepages(struct ceph_fs_client *fsc, - struct ceph_vino vino, - struct ceph_file_layout *layout, - struct ceph_snap_context *snapc, - u64 off, u64 len, - u32 truncate_seq, u64 truncate_size, - struct timespec64 *mtime, - struct page **pages, int num_pages) -{ - struct ceph_osd_client *osdc = &fsc->client->osdc; - struct ceph_osd_request *req; - int rc = 0; - int page_align = off & ~PAGE_MASK; - - req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1, - CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, - snapc, truncate_seq, truncate_size, - true); - if (IS_ERR(req)) - return PTR_ERR(req); - - /* it may be a short write due to an object boundary */ - osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align, - false, false); - dout("writepages %llu~%llu (%llu bytes)\n", off, len, len); - - req->r_mtime = *mtime; - rc = ceph_osdc_start_request(osdc, req, true); - if (!rc) - rc = ceph_osdc_wait_request(osdc, req); - - ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency, - req->r_end_latency, rc); - - ceph_osdc_put_request(req); - if (rc == 0) - rc = len; - dout("writepages result %d\n", rc); - return rc; -} - /* * Write a single page, but leave the page locked. * @@ -671,20 +598,19 @@ static int ceph_sync_writepages(struct ceph_fs_client *fsc, */ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) { - struct inode *inode; - struct ceph_inode_info *ci; - struct ceph_fs_client *fsc; + struct inode *inode = page->mapping->host; + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_snap_context *snapc, *oldest; loff_t page_off = page_offset(page); - int err, len = PAGE_SIZE; + int err; + loff_t len = PAGE_SIZE; struct ceph_writeback_ctl ceph_wbc; + struct ceph_osd_client *osdc = &fsc->client->osdc; + struct ceph_osd_request *req; dout("writepage %p idx %lu\n", page, page->index); - inode = page->mapping->host; - ci = ceph_inode(inode); - fsc = ceph_inode_to_client(inode); - /* verify this is a writeable snap context */ snapc = page_snap_context(page); if (!snapc) { @@ -713,7 +639,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) if (ceph_wbc.i_size < page_off + len) len = ceph_wbc.i_size - page_off; - dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n", + dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n", inode, page, page->index, page_off, len, snapc, snapc->seq); if (atomic_long_inc_return(&fsc->writeback_count) > @@ -721,11 +647,33 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); set_page_writeback(page); - err = ceph_sync_writepages(fsc, ceph_vino(inode), - &ci->i_layout, snapc, page_off, len, - ceph_wbc.truncate_seq, - ceph_wbc.truncate_size, - &inode->i_mtime, &page, 1); + req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1, + CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc, + ceph_wbc.truncate_seq, ceph_wbc.truncate_size, + true); + if (IS_ERR(req)) { + redirty_page_for_writepage(wbc, page); + end_page_writeback(page); + return PTR_ERR(req); + } + + /* it may be a short write due to an object boundary */ + WARN_ON_ONCE(len > PAGE_SIZE); + osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false); + dout("writepage %llu~%llu (%llu bytes)\n", page_off, len, len); + + req->r_mtime = inode->i_mtime; + err = ceph_osdc_start_request(osdc, req, true); + if (!err) + err = ceph_osdc_wait_request(osdc, req); + + ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency, + req->r_end_latency, err); + + ceph_osdc_put_request(req); + if (err == 0) + err = len; + if (err < 0) { struct writeback_control tmp_wbc; if (!wbc) @@ -737,8 +685,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) end_page_writeback(page); return err; } - if (err == -EBLACKLISTED) - fsc->blacklisted = true; + if (err == -EBLOCKLISTED) + fsc->blocklisted = true; dout("writepage setting page/mapping error %d %p\n", err, page); mapping_set_error(&inode->i_data, err); @@ -801,8 +749,8 @@ static void writepages_finish(struct ceph_osd_request *req) if (rc < 0) { mapping_set_error(mapping, rc); ceph_set_error_write(ci); - if (rc == -EBLACKLISTED) - fsc->blacklisted = true; + if (rc == -EBLOCKLISTED) + fsc->blocklisted = true; } else { ceph_clear_error_write(ci); } @@ -962,9 +910,8 @@ static int ceph_writepages_start(struct address_space *mapping, max_pages = wsize >> PAGE_SHIFT; get_more_pages: - pvec_pages = pagevec_lookup_range_nr_tag(&pvec, mapping, &index, - end, PAGECACHE_TAG_DIRTY, - max_pages - locked_pages); + pvec_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, + end, PAGECACHE_TAG_DIRTY); dout("pagevec_lookup_range_tag got %d\n", pvec_pages); if (!pvec_pages && !locked_pages) break; @@ -1299,110 +1246,60 @@ static int context_is_writeable_or_written(struct inode *inode, return ret; } -/* - * We are only allowed to write into/dirty the page if the page is - * clean, or already dirty within the same snap context. +/** + * ceph_find_incompatible - find an incompatible context and return it + * @page: page being dirtied * - * called with page locked. - * return success with page locked, - * or any failure (incl -EAGAIN) with page unlocked. + * We are only allowed to write into/dirty a page if the page is + * clean, or already dirty within the same snap context. Returns a + * conflicting context if there is one, NULL if there isn't, or a + * negative error code on other errors. + * + * Must be called with page lock held. */ -static int ceph_update_writeable_page(struct file *file, - loff_t pos, unsigned len, - struct page *page) +static struct ceph_snap_context * +ceph_find_incompatible(struct page *page) { - struct inode *inode = file_inode(file); + struct inode *inode = page->mapping->host; struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); - loff_t page_off = pos & PAGE_MASK; - int pos_in_page = pos & ~PAGE_MASK; - int end_in_page = pos_in_page + len; - loff_t i_size; - int r; - struct ceph_snap_context *snapc, *oldest; if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { dout(" page %p forced umount\n", page); - unlock_page(page); - return -EIO; + return ERR_PTR(-EIO); } -retry_locked: - /* writepages currently holds page lock, but if we change that later, */ - wait_on_page_writeback(page); + for (;;) { + struct ceph_snap_context *snapc, *oldest; + + wait_on_page_writeback(page); + + snapc = page_snap_context(page); + if (!snapc || snapc == ci->i_head_snapc) + break; - snapc = page_snap_context(page); - if (snapc && snapc != ci->i_head_snapc) { /* * this page is already dirty in another (older) snap * context! is it writeable now? */ oldest = get_oldest_context(inode, NULL, NULL); if (snapc->seq > oldest->seq) { + /* not writeable -- return it for the caller to deal with */ ceph_put_snap_context(oldest); - dout(" page %p snapc %p not current or oldest\n", - page, snapc); - /* - * queue for writeback, and wait for snapc to - * be writeable or written - */ - snapc = ceph_get_snap_context(snapc); - unlock_page(page); - ceph_queue_writeback(inode); - r = wait_event_killable(ci->i_cap_wq, - context_is_writeable_or_written(inode, snapc)); - ceph_put_snap_context(snapc); - if (r == -ERESTARTSYS) - return r; - return -EAGAIN; + dout(" page %p snapc %p not current or oldest\n", page, snapc); + return ceph_get_snap_context(snapc); } ceph_put_snap_context(oldest); /* yay, writeable, do it now (without dropping page lock) */ - dout(" page %p snapc %p not current, but oldest\n", - page, snapc); - if (!clear_page_dirty_for_io(page)) - goto retry_locked; - r = writepage_nounlock(page, NULL); - if (r < 0) - goto fail_unlock; - goto retry_locked; + dout(" page %p snapc %p not current, but oldest\n", page, snapc); + if (clear_page_dirty_for_io(page)) { + int r = writepage_nounlock(page, NULL); + if (r < 0) + return ERR_PTR(r); + } } - - if (PageUptodate(page)) { - dout(" page %p already uptodate\n", page); - return 0; - } - - /* full page? */ - if (pos_in_page == 0 && len == PAGE_SIZE) - return 0; - - /* past end of file? */ - i_size = i_size_read(inode); - - if (page_off >= i_size || - (pos_in_page == 0 && (pos+len) >= i_size && - end_in_page - pos_in_page != PAGE_SIZE)) { - dout(" zeroing %p 0 - %d and %d - %d\n", - page, pos_in_page, end_in_page, (int)PAGE_SIZE); - zero_user_segments(page, - 0, pos_in_page, - end_in_page, PAGE_SIZE); - return 0; - } - - /* we need to read it. */ - r = ceph_do_readpage(file, page); - if (r < 0) { - if (r == -EINPROGRESS) - return -EAGAIN; - goto fail_unlock; - } - goto retry_locked; -fail_unlock: - unlock_page(page); - return r; + return NULL; } /* @@ -1414,26 +1311,78 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = file_inode(file); - struct page *page; + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_snap_context *snapc; + struct page *page = NULL; pgoff_t index = pos >> PAGE_SHIFT; - int r; + int pos_in_page = pos & ~PAGE_MASK; + int r = 0; - do { - /* get a page */ + dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len); + + for (;;) { page = grab_cache_page_write_begin(mapping, index, 0); - if (!page) - return -ENOMEM; + if (!page) { + r = -ENOMEM; + break; + } - dout("write_begin file %p inode %p page %p %d~%d\n", file, - inode, page, (int)pos, (int)len); - - r = ceph_update_writeable_page(file, pos, len, page); - if (r < 0) + snapc = ceph_find_incompatible(page); + if (snapc) { + if (IS_ERR(snapc)) { + r = PTR_ERR(snapc); + break; + } + unlock_page(page); put_page(page); - else - *pagep = page; - } while (r == -EAGAIN); + page = NULL; + ceph_queue_writeback(inode); + r = wait_event_killable(ci->i_cap_wq, + context_is_writeable_or_written(inode, snapc)); + ceph_put_snap_context(snapc); + if (r != 0) + break; + continue; + } + if (PageUptodate(page)) { + dout(" page %p already uptodate\n", page); + break; + } + + /* + * In some cases we don't need to read at all: + * - full page write + * - write that lies completely beyond EOF + * - write that covers the the page from start to EOF or beyond it + */ + if ((pos_in_page == 0 && len == PAGE_SIZE) || + (pos >= i_size_read(inode)) || + (pos_in_page == 0 && (pos + len) >= i_size_read(inode))) { + zero_user_segments(page, 0, pos_in_page, + pos_in_page + len, PAGE_SIZE); + break; + } + + /* + * We need to read it. If we get back -EINPROGRESS, then the page was + * handed off to fscache and it will be unlocked when the read completes. + * Refind the page in that case so we can reacquire the page lock. Otherwise + * we got a hard error or the read was completed synchronously. + */ + r = ceph_do_readpage(file, page); + if (r != -EINPROGRESS) + break; + } + + if (r < 0) { + if (page) { + unlock_page(page); + put_page(page); + } + } else { + *pagep = page; + } return r; } @@ -1522,7 +1471,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; struct page *pinned_page = NULL; - loff_t off = vmf->pgoff << PAGE_SHIFT; + loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT; int want, got, err; sigset_t oldset; vm_fault_t ret = VM_FAULT_SIGBUS; @@ -1668,6 +1617,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) inode_inc_iversion_raw(inode); do { + struct ceph_snap_context *snapc; + lock_page(page); if (page_mkwrite_check_truncate(page, inode) < 0) { @@ -1676,13 +1627,26 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) break; } - err = ceph_update_writeable_page(vma->vm_file, off, len, page); - if (err >= 0) { + snapc = ceph_find_incompatible(page); + if (!snapc) { /* success. we'll keep the page locked. */ set_page_dirty(page); ret = VM_FAULT_LOCKED; + break; } - } while (err == -EAGAIN); + + unlock_page(page); + + if (IS_ERR(snapc)) { + ret = VM_FAULT_SIGBUS; + break; + } + + ceph_queue_writeback(inode); + err = wait_event_killable(ci->i_cap_wq, + context_is_writeable_or_written(inode, snapc)); + ceph_put_snap_context(snapc); + } while (err == 0); if (ret == VM_FAULT_LOCKED || ci->i_inline_version != CEPH_INLINE_NONE) { @@ -2039,16 +2003,16 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, if (err >= 0 || err == -ENOENT) have |= POOL_READ; else if (err != -EPERM) { - if (err == -EBLACKLISTED) - fsc->blacklisted = true; + if (err == -EBLOCKLISTED) + fsc->blocklisted = true; goto out_unlock; } if (err2 == 0 || err2 == -EEXIST) have |= POOL_WRITE; else if (err2 != -EPERM) { - if (err2 == -EBLACKLISTED) - fsc->blacklisted = true; + if (err2 == -EBLOCKLISTED) + fsc->blocklisted = true; err = err2; goto out_unlock; } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 034b3f4fdd3a..5027bbdca419 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1222,36 +1222,27 @@ struct cap_msg_args { }; /* - * Build and send a cap message to the given MDS. - * - * Caller should be holding s_mutex. + * cap struct size + flock buffer size + inline version + inline data size + + * osd_epoch_barrier + oldest_flush_tid */ -static int send_cap_msg(struct cap_msg_args *arg) +#define CAP_MSG_SIZE (sizeof(struct ceph_mds_caps) + \ + 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4) + +/* Marshal up the cap msg to the MDS */ +static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg) { struct ceph_mds_caps *fc; - struct ceph_msg *msg; void *p; - size_t extra_len; struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc; - dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" - " seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu" - " xattr_ver %llu xattr_len %d\n", ceph_cap_op_name(arg->op), - arg->cid, arg->ino, ceph_cap_string(arg->caps), - ceph_cap_string(arg->wanted), ceph_cap_string(arg->dirty), - arg->seq, arg->issue_seq, arg->flush_tid, arg->oldest_flush_tid, - arg->mseq, arg->follows, arg->size, arg->max_size, - arg->xattr_version, + dout("%s %s %llx %llx caps %s wanted %s dirty %s seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu xattr_ver %llu xattr_len %d\n", + __func__, ceph_cap_op_name(arg->op), arg->cid, arg->ino, + ceph_cap_string(arg->caps), ceph_cap_string(arg->wanted), + ceph_cap_string(arg->dirty), arg->seq, arg->issue_seq, + arg->flush_tid, arg->oldest_flush_tid, arg->mseq, arg->follows, + arg->size, arg->max_size, arg->xattr_version, arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0); - /* flock buffer size + inline version + inline data size + - * osd_epoch_barrier + oldest_flush_tid */ - extra_len = 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4; - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len, - GFP_NOFS, false); - if (!msg) - return -ENOMEM; - msg->hdr.version = cpu_to_le16(10); msg->hdr.tid = cpu_to_le64(arg->flush_tid); @@ -1323,9 +1314,6 @@ static int send_cap_msg(struct cap_msg_args *arg) /* Advisory flags (version 10) */ ceph_encode_32(&p, arg->flags); - - ceph_con_send(&arg->session->s_con, msg); - return 0; } /* @@ -1454,25 +1442,25 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, * * Caller should hold snap_rwsem (read), s_mutex. */ -static void __send_cap(struct ceph_mds_client *mdsc, struct cap_msg_args *arg, - struct ceph_inode_info *ci) +static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci) { + struct ceph_msg *msg; struct inode *inode = &ci->vfs_inode; - int ret; - ret = send_cap_msg(arg); - if (ret < 0) { - pr_err("error sending cap msg, ino (%llx.%llx) " - "flushing %s tid %llu, requeue\n", + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false); + if (!msg) { + pr_err("error allocating cap msg: ino (%llx.%llx) flushing %s tid %llu, requeuing cap.\n", ceph_vinop(inode), ceph_cap_string(arg->dirty), arg->flush_tid); spin_lock(&ci->i_ceph_lock); - __cap_delay_requeue(mdsc, ci); + __cap_delay_requeue(arg->session->s_mdsc, ci); spin_unlock(&ci->i_ceph_lock); + return; } + encode_cap_msg(msg, arg); + ceph_con_send(&arg->session->s_con, msg); ceph_buffer_put(arg->old_xattr_buf); - if (arg->wake) wake_up_all(&ci->i_cap_wq); } @@ -1483,6 +1471,11 @@ static inline int __send_flush_snap(struct inode *inode, u32 mseq, u64 oldest_flush_tid) { struct cap_msg_args arg; + struct ceph_msg *msg; + + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false); + if (!msg) + return -ENOMEM; arg.session = session; arg.ino = ceph_vino(inode).ino; @@ -1521,7 +1514,9 @@ static inline int __send_flush_snap(struct inode *inode, arg.flags = 0; arg.wake = false; - return send_cap_msg(&arg); + encode_cap_msg(msg, &arg); + ceph_con_send(&arg.session->s_con, msg); + return 0; } /* @@ -1906,9 +1901,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session) { - struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); - struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = &ci->vfs_inode; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_cap *cap; u64 flush_tid, oldest_flush_tid; int file_wanted, used, cap_used; @@ -1928,12 +1922,24 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, retry: spin_lock(&ci->i_ceph_lock); retry_locked: + /* Caps wanted by virtue of active open files. */ file_wanted = __ceph_caps_file_wanted(ci); + + /* Caps which have active references against them */ used = __ceph_caps_used(ci); + + /* + * "issued" represents the current caps that the MDS wants us to have. + * "implemented" is the set that we have been granted, and includes the + * ones that have not yet been returned to the MDS (the "revoking" set, + * usually because they have outstanding references). + */ issued = __ceph_caps_issued(ci, &implemented); revoking = implemented & ~issued; want = file_wanted; + + /* The ones we currently want to retain (may be adjusted below) */ retain = file_wanted | used | CEPH_CAP_PIN; if (!mdsc->stopping && inode->i_nlink > 0) { if (file_wanted) { @@ -2011,6 +2017,10 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, /* NOTE: no side-effects allowed, until we take s_mutex */ + /* + * If we have an auth cap, we don't need to consider any + * overlapping caps as used. + */ cap_used = used; if (ci->i_auth_cap && cap != ci->i_auth_cap) cap_used &= ~ci->i_auth_cap->issued; @@ -2148,7 +2158,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, want, retain, flushing, flush_tid, oldest_flush_tid); spin_unlock(&ci->i_ceph_lock); - __send_cap(mdsc, &arg, ci); + __send_cap(&arg, ci); goto retry; /* retake i_ceph_lock and restart our cap scan. */ } @@ -2222,7 +2232,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid) flushing, flush_tid, oldest_flush_tid); spin_unlock(&ci->i_ceph_lock); - __send_cap(mdsc, &arg, ci); + __send_cap(&arg, ci); } else { if (!list_empty(&ci->i_cap_flush_list)) { struct ceph_cap_flush *cf = @@ -2436,7 +2446,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, (cap->issued | cap->implemented), cf->caps, cf->tid, oldest_flush_tid); spin_unlock(&ci->i_ceph_lock); - __send_cap(mdsc, &arg, ci); + __send_cap(&arg, ci); } else { struct ceph_cap_snap *capsnap = container_of(cf, struct ceph_cap_snap, @@ -4284,13 +4294,30 @@ void __ceph_touch_fmode(struct ceph_inode_info *ci, void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) { - int i; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb); int bits = (fmode << 1) | 1; + bool is_opened = false; + int i; + + if (count == 1) + atomic64_inc(&mdsc->metric.opened_files); + spin_lock(&ci->i_ceph_lock); for (i = 0; i < CEPH_FILE_MODE_BITS; i++) { if (bits & (1 << i)) ci->i_nr_by_mode[i] += count; + + /* + * If any of the mode ref is larger than 1, + * that means it has been already opened by + * others. Just skip checking the PIN ref. + */ + if (i && ci->i_nr_by_mode[i] > 1) + is_opened = true; } + + if (!is_opened) + percpu_counter_inc(&mdsc->metric.opened_inodes); spin_unlock(&ci->i_ceph_lock); } @@ -4301,15 +4328,32 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) */ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count) { - int i; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb); int bits = (fmode << 1) | 1; + bool is_closed = true; + int i; + + if (count == 1) + atomic64_dec(&mdsc->metric.opened_files); + spin_lock(&ci->i_ceph_lock); for (i = 0; i < CEPH_FILE_MODE_BITS; i++) { if (bits & (1 << i)) { BUG_ON(ci->i_nr_by_mode[i] < count); ci->i_nr_by_mode[i] -= count; } + + /* + * If any of the mode ref is not 0 after + * decreased, that means it is still opened + * by others. Just skip checking the PIN ref. + */ + if (i && ci->i_nr_by_mode[i]) + is_closed = false; } + + if (is_closed) + percpu_counter_dec(&mdsc->metric.opened_inodes); spin_unlock(&ci->i_ceph_lock); } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 3e3fcda9b276..7a8fbe3e4751 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -148,6 +148,17 @@ static int metric_show(struct seq_file *s, void *p) int nr_caps = 0; s64 total, sum, avg, min, max, sq; + sum = percpu_counter_sum(&m->total_inodes); + seq_printf(s, "item total\n"); + seq_printf(s, "------------------------------------------\n"); + seq_printf(s, "%-35s%lld / %lld\n", "opened files / total inodes", + atomic64_read(&m->opened_files), sum); + seq_printf(s, "%-35s%lld / %lld\n", "pinned i_caps / total inodes", + atomic64_read(&m->total_caps), sum); + seq_printf(s, "%-35s%lld / %lld\n", "opened inodes / total inodes", + percpu_counter_sum(&m->opened_inodes), sum); + + seq_printf(s, "\n"); seq_printf(s, "item total avg_lat(us) min_lat(us) max_lat(us) stdev(us)\n"); seq_printf(s, "-----------------------------------------------------------------------------------\n"); @@ -202,7 +213,8 @@ static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p) { struct seq_file *s = p; - seq_printf(s, "0x%-17llx%-17s%-17s\n", ceph_ino(inode), + seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode), + cap->session->s_mds, ceph_cap_string(cap->issued), ceph_cap_string(cap->implemented)); return 0; @@ -222,8 +234,8 @@ static int caps_show(struct seq_file *s, void *p) "reserved\t%d\n" "min\t\t%d\n\n", total, avail, used, reserved, min); - seq_printf(s, "ino issued implemented\n"); - seq_printf(s, "-----------------------------------------------\n"); + seq_printf(s, "ino mds issued implemented\n"); + seq_printf(s, "--------------------------------------------------\n"); mutex_lock(&mdsc->mutex); for (i = 0; i < mdsc->max_sessions; i++) { diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d72e4a12bb69..a4d48370b2b3 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -38,8 +38,7 @@ static int __dir_lease_try_check(const struct dentry *dentry); static int ceph_d_init(struct dentry *dentry) { struct ceph_dentry_info *di; - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dentry->d_sb); di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL); if (!di) @@ -738,7 +737,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; int op; int mask; @@ -827,8 +826,7 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) static int ceph_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; struct ceph_acl_sec_ctx as_ctx = {}; int err; @@ -889,8 +887,7 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, static int ceph_symlink(struct inode *dir, struct dentry *dentry, const char *dest) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; struct ceph_acl_sec_ctx as_ctx = {}; int err; @@ -942,8 +939,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; struct ceph_acl_sec_ctx as_ctx = {}; int err = -EROFS; @@ -1010,8 +1006,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) static int ceph_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; int err; @@ -1192,8 +1187,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb); struct ceph_mds_request *req; int op = CEPH_MDS_OP_RENAME; int err; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3f4c993dfc6f..209535d5b8d3 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -182,8 +182,7 @@ static void put_bvecs(struct bio_vec *bvecs, int num_bvecs, bool should_dirty) static struct ceph_mds_request * prepare_open_request(struct super_block *sb, int flags, int create_mode) { - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb); struct ceph_mds_request *req; int want_auth = USE_ANY_MDS; int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; @@ -256,8 +255,6 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) case S_IFDIR: ret = ceph_init_file_info(inode, file, fmode, S_ISDIR(inode->i_mode)); - if (ret) - return ret; break; case S_IFLNK: @@ -285,7 +282,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) */ int ceph_renew_caps(struct inode *inode, int fmode) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req; int err, flags, wanted; @@ -865,6 +862,8 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, size_t page_off; u64 i_size; bool more; + int idx; + size_t left; req = ceph_osdc_new_request(osdc, &ci->i_layout, ci->i_vino, off, &len, 0, 1, @@ -878,29 +877,13 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, more = len < iov_iter_count(to); - if (unlikely(iov_iter_is_pipe(to))) { - ret = iov_iter_get_pages_alloc(to, &pages, len, - &page_off); - if (ret <= 0) { - ceph_osdc_put_request(req); - ret = -ENOMEM; - break; - } - num_pages = DIV_ROUND_UP(ret + page_off, PAGE_SIZE); - if (ret < len) { - len = ret; - osd_req_op_extent_update(req, 0, len); - more = false; - } - } else { - num_pages = calc_pages_for(off, len); - page_off = off & ~PAGE_MASK; - pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); - if (IS_ERR(pages)) { - ceph_osdc_put_request(req); - ret = PTR_ERR(pages); - break; - } + num_pages = calc_pages_for(off, len); + page_off = off & ~PAGE_MASK; + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); + if (IS_ERR(pages)) { + ceph_osdc_put_request(req); + ret = PTR_ERR(pages); + break; } osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off, @@ -931,36 +914,27 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, ret += zlen; } - if (unlikely(iov_iter_is_pipe(to))) { - if (ret > 0) { - iov_iter_advance(to, ret); - off += ret; - } else { - iov_iter_advance(to, 0); + idx = 0; + left = ret > 0 ? ret : 0; + while (left > 0) { + size_t len, copied; + page_off = off & ~PAGE_MASK; + len = min_t(size_t, left, PAGE_SIZE - page_off); + SetPageUptodate(pages[idx]); + copied = copy_page_to_iter(pages[idx++], + page_off, len, to); + off += copied; + left -= copied; + if (copied < len) { + ret = -EFAULT; + break; } - ceph_put_page_vector(pages, num_pages, false); - } else { - int idx = 0; - size_t left = ret > 0 ? ret : 0; - while (left > 0) { - size_t len, copied; - page_off = off & ~PAGE_MASK; - len = min_t(size_t, left, PAGE_SIZE - page_off); - copied = copy_page_to_iter(pages[idx++], - page_off, len, to); - off += copied; - left -= copied; - if (copied < len) { - ret = -EFAULT; - break; - } - } - ceph_release_page_vector(pages, num_pages); } + ceph_release_page_vector(pages, num_pages); if (ret < 0) { - if (ret == -EBLACKLISTED) - fsc->blacklisted = true; + if (ret == -EBLOCKLISTED) + fsc->blocklisted = true; break; } @@ -1052,8 +1026,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) struct inode *inode = req->r_inode; struct ceph_aio_request *aio_req = req->r_priv; struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_client_metric *metric = &fsc->mdsc->metric; + struct ceph_client_metric *metric = &ceph_sb_to_mdsc(inode->i_sb)->metric; BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS); BUG_ON(!osd_data->num_bvecs); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index d163fa96cb40..526faf4778ce 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -42,10 +42,13 @@ static void ceph_inode_work(struct work_struct *work); static int ceph_set_ino_cb(struct inode *inode, void *data) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); ci->i_vino = *(struct ceph_vino *)data; inode->i_ino = ceph_vino_to_ino_t(ci->i_vino); inode_set_iversion_raw(inode, 0); + percpu_counter_inc(&mdsc->metric.total_inodes); + return 0; } @@ -538,11 +541,14 @@ void ceph_free_inode(struct inode *inode) void ceph_evict_inode(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_inode_frag *frag; struct rb_node *n; dout("evict_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode)); + percpu_counter_dec(&mdsc->metric.total_inodes); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); @@ -558,8 +564,6 @@ void ceph_evict_inode(struct inode *inode) * caps in i_snap_caps. */ if (ci->i_snap_realm) { - struct ceph_mds_client *mdsc = - ceph_inode_to_client(inode)->mdsc; if (ceph_snap(inode) == CEPH_NOSNAP) { struct ceph_snap_realm *realm = ci->i_snap_realm; dout(" dropping residual ref to snap realm %p\n", @@ -739,7 +743,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, struct ceph_mds_session *session, int cap_fmode, struct ceph_cap_reservation *caps_reservation) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); int issued, new_issued, info_caps; diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index d6b9166e71e4..048a435a29be 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -63,7 +63,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = { static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, int cmd, u8 wait, struct file_lock *fl) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_mds_request *req; int err; u64 length = 0; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4a26862d7667..08f1c0c31dc2 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3303,7 +3303,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, } static int __decode_session_metadata(void **p, void *end, - bool *blacklisted) + bool *blocklisted) { /* map */ u32 n; @@ -3317,8 +3317,12 @@ static int __decode_session_metadata(void **p, void *end, *p += len; ceph_decode_32_safe(p, end, len, bad); ceph_decode_need(p, end, len, bad); + /* + * Match "blocklisted (blacklisted)" from newer MDSes, + * or "blacklisted" from older MDSes. + */ if (err_str && strnstr(*p, "blacklisted", len)) - *blacklisted = true; + *blocklisted = true; *p += len; } return 0; @@ -3341,7 +3345,7 @@ static void handle_session(struct ceph_mds_session *session, u32 op; u64 seq, features = 0; int wake = 0; - bool blacklisted = false; + bool blocklisted = false; /* decode */ ceph_decode_need(&p, end, sizeof(*h), bad); @@ -3354,7 +3358,7 @@ static void handle_session(struct ceph_mds_session *session, if (msg_version >= 3) { u32 len; /* version >= 2, metadata */ - if (__decode_session_metadata(&p, end, &blacklisted) < 0) + if (__decode_session_metadata(&p, end, &blocklisted) < 0) goto bad; /* version >= 3, feature bits */ ceph_decode_32_safe(&p, end, len, bad); @@ -3445,8 +3449,8 @@ static void handle_session(struct ceph_mds_session *session, session->s_state = CEPH_MDS_SESSION_REJECTED; cleanup_session_requests(mdsc, session); remove_session_caps(session); - if (blacklisted) - mdsc->fsc->blacklisted = true; + if (blocklisted) + mdsc->fsc->blocklisted = true; wake = 2; /* for good measure */ break; @@ -3612,6 +3616,39 @@ static int send_reconnect_partial(struct ceph_reconnect_state *recon_state) return err; } +static struct dentry* d_find_primary(struct inode *inode) +{ + struct dentry *alias, *dn = NULL; + + if (hlist_empty(&inode->i_dentry)) + return NULL; + + spin_lock(&inode->i_lock); + if (hlist_empty(&inode->i_dentry)) + goto out_unlock; + + if (S_ISDIR(inode->i_mode)) { + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); + if (!IS_ROOT(alias)) + dn = dget(alias); + goto out_unlock; + } + + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { + spin_lock(&alias->d_lock); + if (!d_unhashed(alias) && + (ceph_dentry(alias)->flags & CEPH_DENTRY_PRIMARY_LINK)) { + dn = dget_dlock(alias); + } + spin_unlock(&alias->d_lock); + if (dn) + break; + } +out_unlock: + spin_unlock(&inode->i_lock); + return dn; +} + /* * Encode information about a cap for a reconnect with the MDS. */ @@ -3625,13 +3662,32 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_inode_info *ci = cap->ci; struct ceph_reconnect_state *recon_state = arg; struct ceph_pagelist *pagelist = recon_state->pagelist; - int err; + struct dentry *dentry; + char *path; + int pathlen, err; + u64 pathbase; u64 snap_follows; dout(" adding %p ino %llx.%llx cap %p %lld %s\n", inode, ceph_vinop(inode), cap, cap->cap_id, ceph_cap_string(cap->issued)); + dentry = d_find_primary(inode); + if (dentry) { + /* set pathbase to parent dir when msg_version >= 2 */ + path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, + recon_state->msg_version >= 2); + dput(dentry); + if (IS_ERR(path)) { + err = PTR_ERR(path); + goto out_err; + } + } else { + path = NULL; + pathlen = 0; + pathbase = 0; + } + spin_lock(&ci->i_ceph_lock); cap->seq = 0; /* reset cap seq */ cap->issue_seq = 0; /* and issue_seq */ @@ -3652,7 +3708,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); rec.v2.issued = cpu_to_le32(cap->issued); rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); - rec.v2.pathbase = 0; + rec.v2.pathbase = cpu_to_le64(pathbase); rec.v2.flock_len = (__force __le32) ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); } else { @@ -3663,7 +3719,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime); ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime); rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); - rec.v1.pathbase = 0; + rec.v1.pathbase = cpu_to_le64(pathbase); } if (list_empty(&ci->i_cap_snaps)) { @@ -3725,7 +3781,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, sizeof(struct ceph_filelock); rec.v2.flock_len = cpu_to_le32(struct_len); - struct_len += sizeof(u32) + sizeof(rec.v2); + struct_len += sizeof(u32) + pathlen + sizeof(rec.v2); if (struct_v >= 2) struct_len += sizeof(u64); /* snap_follows */ @@ -3749,7 +3805,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, ceph_pagelist_encode_8(pagelist, 1); ceph_pagelist_encode_32(pagelist, struct_len); } - ceph_pagelist_encode_string(pagelist, NULL, 0); + ceph_pagelist_encode_string(pagelist, path, pathlen); ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2)); ceph_locks_to_pagelist(flocks, pagelist, num_fcntl_locks, num_flock_locks); @@ -3758,39 +3814,20 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, out_freeflocks: kfree(flocks); } else { - u64 pathbase = 0; - int pathlen = 0; - char *path = NULL; - struct dentry *dentry; - - dentry = d_find_alias(inode); - if (dentry) { - path = ceph_mdsc_build_path(dentry, - &pathlen, &pathbase, 0); - dput(dentry); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out_err; - } - rec.v1.pathbase = cpu_to_le64(pathbase); - } - err = ceph_pagelist_reserve(pagelist, sizeof(u64) + sizeof(u32) + pathlen + sizeof(rec.v1)); - if (err) { - goto out_freepath; - } + if (err) + goto out_err; ceph_pagelist_encode_64(pagelist, ceph_ino(inode)); ceph_pagelist_encode_string(pagelist, path, pathlen); ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); -out_freepath: - ceph_mdsc_free_path(path, pathlen); } out_err: - if (err >= 0) + ceph_mdsc_free_path(path, pathlen); + if (!err) recon_state->nr_caps++; return err; } @@ -4334,14 +4371,14 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc) if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_MOUNTED) return; - if (!READ_ONCE(fsc->blacklisted)) + if (!READ_ONCE(fsc->blocklisted)) return; if (fsc->last_auto_reconnect && time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30)) return; - pr_info("auto reconnect after blacklisted\n"); + pr_info("auto reconnect after blocklisted\n"); fsc->last_auto_reconnect = jiffies; ceph_force_reconnect(fsc->sb); } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 658800605bfb..cbf8af437140 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -393,7 +393,7 @@ struct ceph_mds_client { struct ceph_mds_session **sessions; /* NULL for mds if no session */ atomic_t num_sessions; - int max_sessions; /* len of s_mds_sessions */ + int max_sessions; /* len of sessions array */ int stopping; /* true if shutting down */ atomic64_t quotarealms_count; /* # realms with quota */ diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index 2466b261fba2..fee4c4778313 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -192,11 +192,23 @@ int ceph_metric_init(struct ceph_client_metric *m) m->total_metadatas = 0; m->metadata_latency_sum = 0; + atomic64_set(&m->opened_files, 0); + ret = percpu_counter_init(&m->opened_inodes, 0, GFP_KERNEL); + if (ret) + goto err_opened_inodes; + ret = percpu_counter_init(&m->total_inodes, 0, GFP_KERNEL); + if (ret) + goto err_total_inodes; + m->session = NULL; INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work); return 0; +err_total_inodes: + percpu_counter_destroy(&m->opened_inodes); +err_opened_inodes: + percpu_counter_destroy(&m->i_caps_mis); err_i_caps_mis: percpu_counter_destroy(&m->i_caps_hit); err_i_caps_hit: @@ -212,6 +224,8 @@ void ceph_metric_destroy(struct ceph_client_metric *m) if (!m) return; + percpu_counter_destroy(&m->total_inodes); + percpu_counter_destroy(&m->opened_inodes); percpu_counter_destroy(&m->i_caps_mis); percpu_counter_destroy(&m->i_caps_hit); percpu_counter_destroy(&m->d_lease_mis); diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h index 1d0959d669d7..710f3f1dceab 100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -115,6 +115,13 @@ struct ceph_client_metric { ktime_t metadata_latency_min; ktime_t metadata_latency_max; + /* The total number of directories and files that are opened */ + atomic64_t opened_files; + + /* The total number of inodes that have opened files or directories */ + struct percpu_counter opened_inodes; + struct percpu_counter total_inodes; + struct ceph_mds_session *session; struct delayed_work delayed_work; /* delayed work */ }; diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index cc2c4d40b022..83cb4f26b689 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -12,7 +12,7 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); if (inc) atomic64_inc(&mdsc->quotarealms_count); else @@ -21,8 +21,8 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) static inline bool ceph_has_realms_with_quotas(struct inode *inode) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; - struct super_block *sb = mdsc->fsc->sb; + struct super_block *sb = inode->i_sb; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb); struct inode *root = d_inode(sb->s_root); if (atomic64_read(&mdsc->quotarealms_count) > 0) @@ -266,7 +266,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb); struct ceph_snap_realm *old_realm, *new_realm; bool is_same; @@ -313,7 +313,7 @@ enum quota_check_op { static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, loff_t delta) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_inode_info *ci; struct ceph_snap_realm *realm, *next; struct inode *in; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 923be9399b21..0da39c16dab4 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -602,7 +602,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap) { struct inode *inode = &ci->vfs_inode; - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); BUG_ON(capsnap->writing); capsnap->size = inode->i_size; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 7ec0e6d03d10..2516304379d3 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1205,14 +1205,13 @@ static int ceph_init_fs_context(struct fs_context *fc) static void ceph_kill_sb(struct super_block *s) { struct ceph_fs_client *fsc = ceph_sb_to_client(s); - dev_t dev = s->s_dev; dout("kill_sb %p\n", s); ceph_mdsc_pre_umount(fsc->mdsc); flush_fs_workqueues(fsc); - generic_shutdown_super(s); + kill_anon_super(s); fsc->client->extra_mon_dispatch = NULL; ceph_fs_debugfs_cleanup(fsc); @@ -1220,7 +1219,6 @@ static void ceph_kill_sb(struct super_block *s) ceph_fscache_unregister_fs(fsc); destroy_fs_client(fsc); - free_anon_bdev(dev); } static struct file_system_type ceph_fs_type = { @@ -1243,13 +1241,13 @@ int ceph_force_reconnect(struct super_block *sb) * see remove_session_caps_cb() */ flush_workqueue(fsc->inode_wq); - /* In case that we were blacklisted. This also reset + /* In case that we were blocklisted. This also reset * all mon/osd connections */ ceph_reset_client_addr(fsc->client); ceph_osdc_clear_abort_err(&fsc->client->osdc); - fsc->blacklisted = false; + fsc->blocklisted = false; fsc->mount_state = CEPH_MOUNT_MOUNTED; if (sb->s_root) { diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a3995ebe0623..482473e4cce1 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -32,7 +32,7 @@ #define CEPH_BLOCK_SHIFT 22 /* 4 MB */ #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) -#define CEPH_MOUNT_OPT_CLEANRECOVER (1<<1) /* auto reonnect (clean mode) after blacklisted */ +#define CEPH_MOUNT_OPT_CLEANRECOVER (1<<1) /* auto reonnect (clean mode) after blocklisted */ #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ @@ -109,7 +109,7 @@ struct ceph_fs_client { unsigned long mount_state; unsigned long last_auto_reconnect; - bool blacklisted; + bool blocklisted; bool have_copy_from2; @@ -160,7 +160,8 @@ struct ceph_cap { int issued; /* latest, from the mds */ int implemented; /* implemented superset of issued (for revocation) */ - int mds, mds_wanted; + int mds; /* mds index for this cap */ + int mds_wanted; /* caps wanted from this mds */ }; /* caps to release */ struct { @@ -451,6 +452,12 @@ ceph_sb_to_client(const struct super_block *sb) return (struct ceph_fs_client *)sb->s_fs_info; } +static inline struct ceph_mds_client * +ceph_sb_to_mdsc(const struct super_block *sb) +{ + return (struct ceph_mds_client *)ceph_sb_to_client(sb)->mdsc; +} + static inline struct ceph_vino ceph_vino(const struct inode *inode) { diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index ffc370538781..2a3981337077 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -116,7 +116,8 @@ static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, * NULL terminates however, so call it on a temporary buffer and then memcpy * the result into place. */ -static int ceph_fmt_xattr(char *val, size_t size, const char *fmt, ...) +static __printf(3, 4) +int ceph_fmt_xattr(char *val, size_t size, const char *fmt, ...) { int ret; va_list args; diff --git a/fs/io-wq.c b/fs/io-wq.c index 0a182f1333e8..7cb3b4cb9b11 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "io-wq.h" @@ -429,14 +430,10 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) mmput(worker->mm); worker->mm = NULL; } - if (!work->mm) - return; - if (mmget_not_zero(work->mm)) { - kthread_use_mm(work->mm); - worker->mm = work->mm; - /* hang on to this mm */ - work->mm = NULL; + if (mmget_not_zero(work->identity->mm)) { + kthread_use_mm(work->identity->mm); + worker->mm = work->identity->mm; return; } @@ -448,9 +445,11 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker, struct io_wq_work *work) { #ifdef CONFIG_BLK_CGROUP - if (work->blkcg_css != worker->blkcg_css) { - kthread_associate_blkcg(work->blkcg_css); - worker->blkcg_css = work->blkcg_css; + if (!(work->flags & IO_WQ_WORK_BLKCG)) + return; + if (work->identity->blkcg_css != worker->blkcg_css) { + kthread_associate_blkcg(work->identity->blkcg_css); + worker->blkcg_css = work->identity->blkcg_css; } #endif } @@ -458,9 +457,9 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker, static void io_wq_switch_creds(struct io_worker *worker, struct io_wq_work *work) { - const struct cred *old_creds = override_creds(work->creds); + const struct cred *old_creds = override_creds(work->identity->creds); - worker->cur_creds = work->creds; + worker->cur_creds = work->identity->creds; if (worker->saved_creds) put_cred(old_creds); /* creds set by previous switch */ else @@ -470,20 +469,26 @@ static void io_wq_switch_creds(struct io_worker *worker, static void io_impersonate_work(struct io_worker *worker, struct io_wq_work *work) { - if (work->files && current->files != work->files) { + if ((work->flags & IO_WQ_WORK_FILES) && + current->files != work->identity->files) { task_lock(current); - current->files = work->files; - current->nsproxy = work->nsproxy; + current->files = work->identity->files; + current->nsproxy = work->identity->nsproxy; task_unlock(current); } - if (work->fs && current->fs != work->fs) - current->fs = work->fs; - if (work->mm != worker->mm) + if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs) + current->fs = work->identity->fs; + if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm) io_wq_switch_mm(worker, work); - if (worker->cur_creds != work->creds) + if ((work->flags & IO_WQ_WORK_CREDS) && + worker->cur_creds != work->identity->creds) io_wq_switch_creds(worker, work); - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->fsize; + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize; io_wq_switch_blkcg(worker, work); +#ifdef CONFIG_AUDIT + current->loginuid = work->identity->loginuid; + current->sessionid = work->identity->sessionid; +#endif } static void io_assign_current_work(struct io_worker *worker, @@ -496,6 +501,11 @@ static void io_assign_current_work(struct io_worker *worker, cond_resched(); } +#ifdef CONFIG_AUDIT + current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET); + current->sessionid = AUDIT_SID_UNSET; +#endif + spin_lock_irq(&worker->lock); worker->cur_work = work; spin_unlock_irq(&worker->lock); @@ -676,6 +686,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) kfree(worker); return false; } + kthread_bind_mask(worker->task, cpumask_of_node(wqe->node)); raw_spin_lock_irq(&wqe->lock); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); diff --git a/fs/io-wq.h b/fs/io-wq.h index 84bcf6a85523..be21c500c925 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -1,6 +1,8 @@ #ifndef INTERNAL_IO_WQ_H #define INTERNAL_IO_WQ_H +#include + struct io_wq; enum { @@ -10,6 +12,12 @@ enum { IO_WQ_WORK_NO_CANCEL = 8, IO_WQ_WORK_CONCURRENT = 16, + IO_WQ_WORK_FILES = 32, + IO_WQ_WORK_FS = 64, + IO_WQ_WORK_MM = 128, + IO_WQ_WORK_CREDS = 256, + IO_WQ_WORK_BLKCG = 512, + IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ }; @@ -85,15 +93,7 @@ static inline void wq_list_del(struct io_wq_work_list *list, struct io_wq_work { struct io_wq_work_node list; - struct files_struct *files; - struct mm_struct *mm; -#ifdef CONFIG_BLK_CGROUP - struct cgroup_subsys_state *blkcg_css; -#endif - const struct cred *creds; - struct nsproxy *nsproxy; - struct fs_struct *fs; - unsigned long fsize; + struct io_identity *identity; unsigned flags; }; diff --git a/fs/io_uring.c b/fs/io_uring.c index a8f039a04037..6232d85c63ee 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -81,6 +81,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -327,6 +328,11 @@ struct io_ring_ctx { const struct cred *creds; +#ifdef CONFIG_AUDIT + kuid_t loginuid; + unsigned int sessionid; +#endif + struct completion ref_comp; struct completion sq_thread_comp; @@ -574,7 +580,6 @@ enum { REQ_F_NOWAIT_BIT, REQ_F_LINK_TIMEOUT_BIT, REQ_F_ISREG_BIT, - REQ_F_COMP_LOCKED_BIT, REQ_F_NEED_CLEANUP_BIT, REQ_F_POLLED_BIT, REQ_F_BUFFER_SELECTED_BIT, @@ -613,8 +618,6 @@ enum { REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), /* regular file */ REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), - /* completion under lock */ - REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT), /* needs cleanup */ REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), /* already went through poll handler */ @@ -732,8 +735,6 @@ struct io_submit_state { }; struct io_op_def { - /* needs current->mm setup, does mm access */ - unsigned needs_mm : 1; /* needs req->file assigned */ unsigned needs_file : 1; /* don't fail if file grab fails */ @@ -744,10 +745,6 @@ struct io_op_def { unsigned unbound_nonreg_file : 1; /* opcode is not supported by this kernel */ unsigned not_supported : 1; - /* needs file table */ - unsigned file_table : 1; - /* needs ->fs */ - unsigned needs_fs : 1; /* set if opcode supports polled "wait" */ unsigned pollin : 1; unsigned pollout : 1; @@ -757,45 +754,42 @@ struct io_op_def { unsigned needs_fsize : 1; /* must always have async data allocated */ unsigned needs_async_data : 1; - /* needs blkcg context, issues async io potentially */ - unsigned needs_blkcg : 1; /* size of async data needed, if any */ unsigned short async_size; + unsigned work_flags; }; -static const struct io_op_def io_op_defs[] __read_mostly = { +static const struct io_op_def io_op_defs[] = { [IORING_OP_NOP] = {}, [IORING_OP_READV] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, .needs_async_data = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_rw), + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_WRITEV] = { - .needs_mm = 1, .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .needs_fsize = 1, .needs_async_data = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_rw), + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_FSYNC] = { .needs_file = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_READ_FIXED] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_rw), + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_WRITE_FIXED] = { .needs_file = 1, @@ -803,8 +797,8 @@ static const struct io_op_def io_op_defs[] __read_mostly = { .unbound_nonreg_file = 1, .pollout = 1, .needs_fsize = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_rw), + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_POLL_ADD] = { .needs_file = 1, @@ -813,137 +807,123 @@ static const struct io_op_def io_op_defs[] __read_mostly = { [IORING_OP_POLL_REMOVE] = {}, [IORING_OP_SYNC_FILE_RANGE] = { .needs_file = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_SENDMSG] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, - .needs_fs = 1, .pollout = 1, .needs_async_data = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_msghdr), + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | + IO_WQ_WORK_FS, }, [IORING_OP_RECVMSG] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, - .needs_fs = 1, .pollin = 1, .buffer_select = 1, .needs_async_data = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_msghdr), + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | + IO_WQ_WORK_FS, }, [IORING_OP_TIMEOUT] = { - .needs_mm = 1, .needs_async_data = 1, .async_size = sizeof(struct io_timeout_data), + .work_flags = IO_WQ_WORK_MM, }, [IORING_OP_TIMEOUT_REMOVE] = {}, [IORING_OP_ACCEPT] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, - .file_table = 1, .pollin = 1, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FILES, }, [IORING_OP_ASYNC_CANCEL] = {}, [IORING_OP_LINK_TIMEOUT] = { - .needs_mm = 1, .needs_async_data = 1, .async_size = sizeof(struct io_timeout_data), + .work_flags = IO_WQ_WORK_MM, }, [IORING_OP_CONNECT] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .needs_async_data = 1, .async_size = sizeof(struct io_async_connect), + .work_flags = IO_WQ_WORK_MM, }, [IORING_OP_FALLOCATE] = { .needs_file = 1, .needs_fsize = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_OPENAT] = { - .file_table = 1, - .needs_fs = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG | + IO_WQ_WORK_FS, }, [IORING_OP_CLOSE] = { .needs_file = 1, .needs_file_no_error = 1, - .file_table = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG, }, [IORING_OP_FILES_UPDATE] = { - .needs_mm = 1, - .file_table = 1, + .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_MM, }, [IORING_OP_STATX] = { - .needs_mm = 1, - .needs_fs = 1, - .file_table = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_MM | + IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG, }, [IORING_OP_READ] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_rw), + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_WRITE] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .needs_fsize = 1, - .needs_blkcg = 1, .async_size = sizeof(struct io_async_rw), + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_FADVISE] = { .needs_file = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_MADVISE] = { - .needs_mm = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_SEND] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_RECV] = { - .needs_mm = 1, .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_OPENAT2] = { - .file_table = 1, - .needs_fs = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_FS | + IO_WQ_WORK_BLKCG, }, [IORING_OP_EPOLL_CTL] = { .unbound_nonreg_file = 1, - .file_table = 1, + .work_flags = IO_WQ_WORK_FILES, }, [IORING_OP_SPLICE] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, - .needs_blkcg = 1, + .work_flags = IO_WQ_WORK_BLKCG, }, [IORING_OP_PROVIDE_BUFFERS] = {}, [IORING_OP_REMOVE_BUFFERS] = {}, @@ -963,8 +943,8 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2, struct io_comp_state *cs); static void io_cqring_fill_event(struct io_kiocb *req, long res); static void io_put_req(struct io_kiocb *req); +static void io_put_req_deferred(struct io_kiocb *req, int nr); static void io_double_put_req(struct io_kiocb *req); -static void __io_double_put_req(struct io_kiocb *req); static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req); static void __io_queue_linked_timeout(struct io_kiocb *req); static void io_queue_linked_timeout(struct io_kiocb *req); @@ -986,7 +966,7 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, static struct kmem_cache *req_cachep; -static const struct file_operations io_uring_fops __read_mostly; +static const struct file_operations io_uring_fops; struct sock *io_uring_get_socket(struct file *file) { @@ -1034,7 +1014,7 @@ static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx) static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx, struct io_kiocb *req) { - if (!io_op_defs[req->opcode].needs_mm) + if (!(io_op_defs[req->opcode].work_flags & IO_WQ_WORK_MM)) return 0; return __io_sq_thread_acquire_mm(ctx); } @@ -1065,17 +1045,49 @@ static inline void req_set_fail_links(struct io_kiocb *req) req->flags |= REQ_F_FAIL_LINK; } +/* + * None of these are dereferenced, they are simply used to check if any of + * them have changed. If we're under current and check they are still the + * same, we're fine to grab references to them for actual out-of-line use. + */ +static void io_init_identity(struct io_identity *id) +{ + id->files = current->files; + id->mm = current->mm; +#ifdef CONFIG_BLK_CGROUP + rcu_read_lock(); + id->blkcg_css = blkcg_css(); + rcu_read_unlock(); +#endif + id->creds = current_cred(); + id->nsproxy = current->nsproxy; + id->fs = current->fs; + id->fsize = rlimit(RLIMIT_FSIZE); +#ifdef CONFIG_AUDIT + id->loginuid = current->loginuid; + id->sessionid = current->sessionid; +#endif + refcount_set(&id->count, 1); +} + /* * Note: must call io_req_init_async() for the first time you * touch any members of io_wq_work. */ static inline void io_req_init_async(struct io_kiocb *req) { + struct io_uring_task *tctx = current->io_uring; + if (req->flags & REQ_F_WORK_INITIALIZED) return; memset(&req->work, 0, sizeof(req->work)); req->flags |= REQ_F_WORK_INITIALIZED; + + /* Grab a ref if this isn't our static identity */ + req->work.identity = tctx->identity; + if (tctx->identity != &tctx->__identity) + refcount_inc(&req->work.identity->count); } static inline bool io_async_submit(struct io_ring_ctx *ctx) @@ -1181,53 +1193,169 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) } } -/* - * Returns true if we need to defer file table putting. This can only happen - * from the error path with REQ_F_COMP_LOCKED set. - */ -static bool io_req_clean_work(struct io_kiocb *req) +static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req) +{ + if (req->work.identity == &tctx->__identity) + return; + if (refcount_dec_and_test(&req->work.identity->count)) + kfree(req->work.identity); +} + +static void io_req_clean_work(struct io_kiocb *req) { if (!(req->flags & REQ_F_WORK_INITIALIZED)) - return false; + return; req->flags &= ~REQ_F_WORK_INITIALIZED; - if (req->work.mm) { - mmdrop(req->work.mm); - req->work.mm = NULL; + if (req->work.flags & IO_WQ_WORK_MM) { + mmdrop(req->work.identity->mm); + req->work.flags &= ~IO_WQ_WORK_MM; } #ifdef CONFIG_BLK_CGROUP - if (req->work.blkcg_css) - css_put(req->work.blkcg_css); -#endif - if (req->work.creds) { - put_cred(req->work.creds); - req->work.creds = NULL; + if (req->work.flags & IO_WQ_WORK_BLKCG) { + css_put(req->work.identity->blkcg_css); + req->work.flags &= ~IO_WQ_WORK_BLKCG; } - if (req->work.fs) { - struct fs_struct *fs = req->work.fs; +#endif + if (req->work.flags & IO_WQ_WORK_CREDS) { + put_cred(req->work.identity->creds); + req->work.flags &= ~IO_WQ_WORK_CREDS; + } + if (req->work.flags & IO_WQ_WORK_FS) { + struct fs_struct *fs = req->work.identity->fs; - if (req->flags & REQ_F_COMP_LOCKED) - return true; - - spin_lock(&req->work.fs->lock); + spin_lock(&req->work.identity->fs->lock); if (--fs->users) fs = NULL; - spin_unlock(&req->work.fs->lock); + spin_unlock(&req->work.identity->fs->lock); if (fs) free_fs_struct(fs); - req->work.fs = NULL; + req->work.flags &= ~IO_WQ_WORK_FS; } - return false; + io_put_identity(req->task->io_uring, req); +} + +/* + * Create a private copy of io_identity, since some fields don't match + * the current context. + */ +static bool io_identity_cow(struct io_kiocb *req) +{ + struct io_uring_task *tctx = current->io_uring; + const struct cred *creds = NULL; + struct io_identity *id; + + if (req->work.flags & IO_WQ_WORK_CREDS) + creds = req->work.identity->creds; + + id = kmemdup(req->work.identity, sizeof(*id), GFP_KERNEL); + if (unlikely(!id)) { + req->work.flags |= IO_WQ_WORK_CANCEL; + return false; + } + + /* + * We can safely just re-init the creds we copied Either the field + * matches the current one, or we haven't grabbed it yet. The only + * exception is ->creds, through registered personalities, so handle + * that one separately. + */ + io_init_identity(id); + if (creds) + req->work.identity->creds = creds; + + /* add one for this request */ + refcount_inc(&id->count); + + /* drop old identity, assign new one. one ref for req, one for tctx */ + if (req->work.identity != tctx->identity && + refcount_sub_and_test(2, &req->work.identity->count)) + kfree(req->work.identity); + + req->work.identity = id; + tctx->identity = id; + return true; +} + +static bool io_grab_identity(struct io_kiocb *req) +{ + const struct io_op_def *def = &io_op_defs[req->opcode]; + struct io_identity *id = req->work.identity; + struct io_ring_ctx *ctx = req->ctx; + + if (def->needs_fsize && id->fsize != rlimit(RLIMIT_FSIZE)) + return false; + + if (!(req->work.flags & IO_WQ_WORK_FILES) && + (def->work_flags & IO_WQ_WORK_FILES) && + !(req->flags & REQ_F_NO_FILE_TABLE)) { + if (id->files != current->files || + id->nsproxy != current->nsproxy) + return false; + atomic_inc(&id->files->count); + get_nsproxy(id->nsproxy); + req->flags |= REQ_F_INFLIGHT; + + spin_lock_irq(&ctx->inflight_lock); + list_add(&req->inflight_entry, &ctx->inflight_list); + spin_unlock_irq(&ctx->inflight_lock); + req->work.flags |= IO_WQ_WORK_FILES; + } +#ifdef CONFIG_BLK_CGROUP + if (!(req->work.flags & IO_WQ_WORK_BLKCG) && + (def->work_flags & IO_WQ_WORK_BLKCG)) { + rcu_read_lock(); + if (id->blkcg_css != blkcg_css()) { + rcu_read_unlock(); + return false; + } + /* + * This should be rare, either the cgroup is dying or the task + * is moving cgroups. Just punt to root for the handful of ios. + */ + if (css_tryget_online(id->blkcg_css)) + req->work.flags |= IO_WQ_WORK_BLKCG; + rcu_read_unlock(); + } +#endif + if (!(req->work.flags & IO_WQ_WORK_CREDS)) { + if (id->creds != current_cred()) + return false; + get_cred(id->creds); + req->work.flags |= IO_WQ_WORK_CREDS; + } +#ifdef CONFIG_AUDIT + if (!uid_eq(current->loginuid, id->loginuid) || + current->sessionid != id->sessionid) + return false; +#endif + if (!(req->work.flags & IO_WQ_WORK_FS) && + (def->work_flags & IO_WQ_WORK_FS)) { + if (current->fs != id->fs) + return false; + spin_lock(&id->fs->lock); + if (!id->fs->in_exec) { + id->fs->users++; + req->work.flags |= IO_WQ_WORK_FS; + } else { + req->work.flags |= IO_WQ_WORK_CANCEL; + } + spin_unlock(¤t->fs->lock); + } + + return true; } static void io_prep_async_work(struct io_kiocb *req) { const struct io_op_def *def = &io_op_defs[req->opcode]; struct io_ring_ctx *ctx = req->ctx; + struct io_identity *id; io_req_init_async(req); + id = req->work.identity; if (req->flags & REQ_F_ISREG) { if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL)) @@ -1236,50 +1364,24 @@ static void io_prep_async_work(struct io_kiocb *req) if (def->unbound_nonreg_file) req->work.flags |= IO_WQ_WORK_UNBOUND; } - if (!req->work.files && io_op_defs[req->opcode].file_table && - !(req->flags & REQ_F_NO_FILE_TABLE)) { - req->work.files = get_files_struct(current); - get_nsproxy(current->nsproxy); - req->work.nsproxy = current->nsproxy; - req->flags |= REQ_F_INFLIGHT; - spin_lock_irq(&ctx->inflight_lock); - list_add(&req->inflight_entry, &ctx->inflight_list); - spin_unlock_irq(&ctx->inflight_lock); + /* ->mm can never change on us */ + if (!(req->work.flags & IO_WQ_WORK_MM) && + (def->work_flags & IO_WQ_WORK_MM)) { + mmgrab(id->mm); + req->work.flags |= IO_WQ_WORK_MM; } - if (!req->work.mm && def->needs_mm) { - mmgrab(current->mm); - req->work.mm = current->mm; - } -#ifdef CONFIG_BLK_CGROUP - if (!req->work.blkcg_css && def->needs_blkcg) { - rcu_read_lock(); - req->work.blkcg_css = blkcg_css(); - /* - * This should be rare, either the cgroup is dying or the task - * is moving cgroups. Just punt to root for the handful of ios. - */ - if (!css_tryget_online(req->work.blkcg_css)) - req->work.blkcg_css = NULL; - rcu_read_unlock(); - } -#endif - if (!req->work.creds) - req->work.creds = get_current_cred(); - if (!req->work.fs && def->needs_fs) { - spin_lock(¤t->fs->lock); - if (!current->fs->in_exec) { - req->work.fs = current->fs; - req->work.fs->users++; - } else { - req->work.flags |= IO_WQ_WORK_CANCEL; - } - spin_unlock(¤t->fs->lock); - } - if (def->needs_fsize) - req->work.fsize = rlimit(RLIMIT_FSIZE); - else - req->work.fsize = RLIM_INFINITY; + + /* if we fail grabbing identity, we must COW, regrab, and retry */ + if (io_grab_identity(req)) + return; + + if (!io_identity_cow(req)) + return; + + /* can't fail at this point */ + if (!io_grab_identity(req)) + WARN_ON(1); } static void io_prep_async_link(struct io_kiocb *req) @@ -1325,9 +1427,8 @@ static void io_kill_timeout(struct io_kiocb *req) atomic_set(&req->ctx->cq_timeouts, atomic_read(&req->ctx->cq_timeouts) + 1); list_del_init(&req->timeout.list); - req->flags |= REQ_F_COMP_LOCKED; io_cqring_fill_event(req, 0); - io_put_req(req); + io_put_req_deferred(req, 1); } } @@ -1378,8 +1479,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx) if (link) { __io_queue_linked_timeout(link); /* drop submission reference */ - link->flags |= REQ_F_COMP_LOCKED; - io_put_req(link); + io_put_req_deferred(link, 1); } kfree(de); } while (!list_empty(&ctx->defer_list)); @@ -1471,8 +1571,9 @@ static inline bool io_match_files(struct io_kiocb *req, { if (!files) return true; - if (req->flags & REQ_F_WORK_INITIALIZED) - return req->work.files == files; + if ((req->flags & REQ_F_WORK_INITIALIZED) && + (req->work.flags & IO_WQ_WORK_FILES)) + return req->work.identity->files == files; return false; } @@ -1606,13 +1707,19 @@ static void io_submit_flush_completions(struct io_comp_state *cs) req = list_first_entry(&cs->list, struct io_kiocb, compl.list); list_del(&req->compl.list); __io_cqring_fill_event(req, req->result, req->compl.cflags); - if (!(req->flags & REQ_F_LINK_HEAD)) { - req->flags |= REQ_F_COMP_LOCKED; - io_put_req(req); - } else { + + /* + * io_free_req() doesn't care about completion_lock unless one + * of these flags is set. REQ_F_WORK_INITIALIZED is in the list + * because of a potential deadlock with req->work.fs->lock + */ + if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT + |REQ_F_WORK_INITIALIZED)) { spin_unlock_irq(&ctx->completion_lock); io_put_req(req); spin_lock_irq(&ctx->completion_lock); + } else { + io_put_req(req); } } io_commit_cqring(ctx); @@ -1699,7 +1806,7 @@ static inline void io_put_file(struct io_kiocb *req, struct file *file, fput(file); } -static bool io_dismantle_req(struct io_kiocb *req) +static void io_dismantle_req(struct io_kiocb *req) { io_clean_op(req); @@ -1708,15 +1815,17 @@ static bool io_dismantle_req(struct io_kiocb *req) if (req->file) io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); - return io_req_clean_work(req); + io_req_clean_work(req); } -static void __io_free_req_finish(struct io_kiocb *req) +static void __io_free_req(struct io_kiocb *req) { struct io_uring_task *tctx = req->task->io_uring; struct io_ring_ctx *ctx = req->ctx; - atomic_long_inc(&tctx->req_complete); + io_dismantle_req(req); + + percpu_counter_dec(&tctx->inflight); if (tctx->in_idle) wake_up(&tctx->wait); put_task_struct(req->task); @@ -1728,39 +1837,6 @@ static void __io_free_req_finish(struct io_kiocb *req) percpu_ref_put(&ctx->refs); } -static void io_req_task_file_table_put(struct callback_head *cb) -{ - struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); - struct fs_struct *fs = req->work.fs; - - spin_lock(&req->work.fs->lock); - if (--fs->users) - fs = NULL; - spin_unlock(&req->work.fs->lock); - if (fs) - free_fs_struct(fs); - req->work.fs = NULL; - __io_free_req_finish(req); -} - -static void __io_free_req(struct io_kiocb *req) -{ - if (!io_dismantle_req(req)) { - __io_free_req_finish(req); - } else { - int ret; - - init_task_work(&req->task_work, io_req_task_file_table_put); - ret = task_work_add(req->task, &req->task_work, TWA_RESUME); - if (unlikely(ret)) { - struct task_struct *tsk; - - tsk = io_wq_get_task(req->ctx->io_wq); - task_work_add(tsk, &req->task_work, 0); - } - } -} - static bool io_link_cancel_timeout(struct io_kiocb *req) { struct io_timeout_data *io = req->async_data; @@ -1772,7 +1848,7 @@ static bool io_link_cancel_timeout(struct io_kiocb *req) io_cqring_fill_event(req, -ECANCELED); io_commit_cqring(ctx); req->flags &= ~REQ_F_LINK_HEAD; - io_put_req(req); + io_put_req_deferred(req, 1); return true; } @@ -1791,7 +1867,6 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req) return false; list_del_init(&link->link_list); - link->flags |= REQ_F_COMP_LOCKED; wake_ev = io_link_cancel_timeout(link); req->flags &= ~REQ_F_LINK_TIMEOUT; return wake_ev; @@ -1800,17 +1875,12 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req) static void io_kill_linked_timeout(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; + unsigned long flags; bool wake_ev; - if (!(req->flags & REQ_F_COMP_LOCKED)) { - unsigned long flags; - - spin_lock_irqsave(&ctx->completion_lock, flags); - wake_ev = __io_kill_linked_timeout(req); - spin_unlock_irqrestore(&ctx->completion_lock, flags); - } else { - wake_ev = __io_kill_linked_timeout(req); - } + spin_lock_irqsave(&ctx->completion_lock, flags); + wake_ev = __io_kill_linked_timeout(req); + spin_unlock_irqrestore(&ctx->completion_lock, flags); if (wake_ev) io_cqring_ev_posted(ctx); @@ -1850,28 +1920,29 @@ static void __io_fail_links(struct io_kiocb *req) trace_io_uring_fail_link(req, link); io_cqring_fill_event(link, -ECANCELED); - link->flags |= REQ_F_COMP_LOCKED; - __io_double_put_req(link); - req->flags &= ~REQ_F_LINK_TIMEOUT; + + /* + * It's ok to free under spinlock as they're not linked anymore, + * but avoid REQ_F_WORK_INITIALIZED because it may deadlock on + * work.fs->lock. + */ + if (link->flags & REQ_F_WORK_INITIALIZED) + io_put_req_deferred(link, 2); + else + io_double_put_req(link); } io_commit_cqring(ctx); - io_cqring_ev_posted(ctx); } static void io_fail_links(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; + unsigned long flags; - if (!(req->flags & REQ_F_COMP_LOCKED)) { - unsigned long flags; - - spin_lock_irqsave(&ctx->completion_lock, flags); - __io_fail_links(req); - spin_unlock_irqrestore(&ctx->completion_lock, flags); - } else { - __io_fail_links(req); - } + spin_lock_irqsave(&ctx->completion_lock, flags); + __io_fail_links(req); + spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); } @@ -2033,7 +2104,9 @@ static void io_req_free_batch_finish(struct io_ring_ctx *ctx, if (rb->to_free) __io_req_free_batch_flush(ctx, rb); if (rb->task) { - atomic_long_add(rb->task_refs, &rb->task->io_uring->req_complete); + struct io_uring_task *tctx = rb->task->io_uring; + + percpu_counter_sub(&tctx->inflight, rb->task_refs); put_task_struct_many(rb->task, rb->task_refs); rb->task = NULL; } @@ -2050,7 +2123,9 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req) if (req->task != rb->task) { if (rb->task) { - atomic_long_add(rb->task_refs, &rb->task->io_uring->req_complete); + struct io_uring_task *tctx = rb->task->io_uring; + + percpu_counter_sub(&tctx->inflight, rb->task_refs); put_task_struct_many(rb->task, rb->task_refs); } rb->task = req->task; @@ -2058,7 +2133,7 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req) } rb->task_refs++; - WARN_ON_ONCE(io_dismantle_req(req)); + io_dismantle_req(req); rb->reqs[rb->to_free++] = req; if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs))) __io_req_free_batch_flush(req->ctx, rb); @@ -2085,6 +2160,34 @@ static void io_put_req(struct io_kiocb *req) io_free_req(req); } +static void io_put_req_deferred_cb(struct callback_head *cb) +{ + struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); + + io_free_req(req); +} + +static void io_free_req_deferred(struct io_kiocb *req) +{ + int ret; + + init_task_work(&req->task_work, io_put_req_deferred_cb); + ret = io_req_task_work_add(req, true); + if (unlikely(ret)) { + struct task_struct *tsk; + + tsk = io_wq_get_task(req->ctx->io_wq); + task_work_add(tsk, &req->task_work, 0); + wake_up_process(tsk); + } +} + +static inline void io_put_req_deferred(struct io_kiocb *req, int refs) +{ + if (refcount_sub_and_test(refs, &req->refs)) + io_free_req_deferred(req); +} + static struct io_wq_work *io_steal_work(struct io_kiocb *req) { struct io_kiocb *nxt; @@ -2101,17 +2204,6 @@ static struct io_wq_work *io_steal_work(struct io_kiocb *req) return nxt ? &nxt->work : NULL; } -/* - * Must only be used if we don't need to care about links, usually from - * within the completion handling itself. - */ -static void __io_double_put_req(struct io_kiocb *req) -{ - /* drop both submit and complete references */ - if (refcount_sub_and_test(2, &req->refs)) - __io_free_req(req); -} - static void io_double_put_req(struct io_kiocb *req) { /* drop both submit and complete references */ @@ -2601,7 +2693,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd) static bool io_bdev_nowait(struct block_device *bdev) { #ifdef CONFIG_BLOCK - return !bdev || queue_is_mq(bdev_get_queue(bdev)); + return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); #else return true; #endif @@ -4128,7 +4220,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock, } /* No ->flush() or already async, safely close from here */ - ret = filp_close(close->put_file, req->work.files); + ret = filp_close(close->put_file, req->work.identity->files); if (ret < 0) req_set_fail_links(req); fput(close->put_file); @@ -4849,10 +4941,9 @@ static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt) hash_del(&req->hash_node); io_poll_complete(req, req->result, 0); - req->flags |= REQ_F_COMP_LOCKED; - *nxt = io_put_req_find_next(req); spin_unlock_irq(&ctx->completion_lock); + *nxt = io_put_req_find_next(req); io_cqring_ev_posted(ctx); } @@ -4921,6 +5012,8 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, * for write). Setup a separate io_poll_iocb if this happens. */ if (unlikely(poll->head)) { + struct io_poll_iocb *poll_one = poll; + /* already have a 2nd entry, fail a third attempt */ if (*poll_ptr) { pt->error = -EINVAL; @@ -4931,7 +5024,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, pt->error = -ENOMEM; return; } - io_init_poll_iocb(poll, req->poll.events, io_poll_double_wake); + io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake); refcount_inc(&req->refs); poll->wait.private = req; *poll_ptr = poll; @@ -5148,9 +5241,8 @@ static bool io_poll_remove_one(struct io_kiocb *req) if (do_complete) { io_cqring_fill_event(req, -ECANCELED); io_commit_cqring(req->ctx); - req->flags |= REQ_F_COMP_LOCKED; req_set_fail_links(req); - io_put_req(req); + io_put_req_deferred(req, 1); } return do_complete; @@ -5332,9 +5424,8 @@ static int __io_timeout_cancel(struct io_kiocb *req) list_del_init(&req->timeout.list); req_set_fail_links(req); - req->flags |= REQ_F_COMP_LOCKED; io_cqring_fill_event(req, -ECANCELED); - io_put_req(req); + io_put_req_deferred(req, 1); return 0; } @@ -5744,9 +5835,9 @@ static void io_req_drop_files(struct io_kiocb *req) wake_up(&ctx->inflight_wait); spin_unlock_irqrestore(&ctx->inflight_lock, flags); req->flags &= ~REQ_F_INFLIGHT; - put_files_struct(req->work.files); - put_nsproxy(req->work.nsproxy); - req->work.files = NULL; + put_files_struct(req->work.identity->files); + put_nsproxy(req->work.identity->nsproxy); + req->work.flags &= ~IO_WQ_WORK_FILES; } static void __io_clean_op(struct io_kiocb *req) @@ -6104,14 +6195,15 @@ static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs) again: linked_timeout = io_prep_linked_timeout(req); - if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds && - req->work.creds != current_cred()) { + if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.identity->creds && + req->work.identity->creds != current_cred()) { if (old_creds) revert_creds(old_creds); - if (old_creds == req->work.creds) + if (old_creds == req->work.identity->creds) old_creds = NULL; /* restored original creds */ else - old_creds = override_creds(req->work.creds); + old_creds = override_creds(req->work.identity->creds); + req->work.flags |= IO_WQ_WORK_CREDS; } ret = io_issue_sqe(req, true, cs); @@ -6414,11 +6506,17 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, id = READ_ONCE(sqe->personality); if (id) { + struct io_identity *iod; + io_req_init_async(req); - req->work.creds = idr_find(&ctx->personality_idr, id); - if (unlikely(!req->work.creds)) + iod = idr_find(&ctx->personality_idr, id); + if (unlikely(!iod)) return -EINVAL; - get_cred(req->work.creds); + refcount_inc(&iod->count); + io_put_identity(current->io_uring, req); + get_cred(iod->creds); + req->work.identity = iod; + req->work.flags |= IO_WQ_WORK_CREDS; } /* same numerical values with corresponding REQ_F_*, safe to copy */ @@ -6451,7 +6549,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) if (!percpu_ref_tryget_many(&ctx->refs, nr)) return -EAGAIN; - atomic_long_add(nr, ¤t->io_uring->req_issue); + percpu_counter_add(¤t->io_uring->inflight, nr); refcount_add(nr, ¤t->usage); io_submit_state_start(&state, ctx, nr); @@ -6493,10 +6591,12 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) if (unlikely(submitted != nr)) { int ref_used = (submitted == -EAGAIN) ? 0 : submitted; + struct io_uring_task *tctx = current->io_uring; + int unused = nr - ref_used; - percpu_ref_put_many(&ctx->refs, nr - ref_used); - atomic_long_sub(nr - ref_used, ¤t->io_uring->req_issue); - put_task_struct_many(current, nr - ref_used); + percpu_ref_put_many(&ctx->refs, unused); + percpu_counter_sub(&tctx->inflight, unused); + put_task_struct_many(current, unused); } if (link) io_queue_link_head(link, &state.comp); @@ -6676,6 +6776,10 @@ static int io_sq_thread(void *data) old_cred = override_creds(ctx->creds); } io_sq_thread_associate_blkcg(ctx, &cur_css); +#ifdef CONFIG_AUDIT + current->loginuid = ctx->loginuid; + current->sessionid = ctx->sessionid; +#endif ret |= __io_sq_thread(ctx, start_jiffies, cap_entries); @@ -7310,7 +7414,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, spin_lock_init(&file_data->lock); nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE); - file_data->table = kcalloc(nr_tables, sizeof(file_data->table), + file_data->table = kcalloc(nr_tables, sizeof(*file_data->table), GFP_KERNEL); if (!file_data->table) goto out_free; @@ -7321,6 +7425,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, if (io_sqe_alloc_file_tables(file_data, nr_tables, nr_args)) goto out_ref; + ctx->file_data = file_data; for (i = 0; i < nr_args; i++, ctx->nr_user_files++) { struct fixed_file_table *table; @@ -7355,7 +7460,6 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, table->files[index] = file; } - ctx->file_data = file_data; ret = io_sqe_files_scm(ctx); if (ret) { io_sqe_files_unregister(ctx); @@ -7388,6 +7492,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, out_free: kfree(file_data->table); kfree(file_data); + ctx->file_data = NULL; return ret; } @@ -7613,17 +7718,24 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx, static int io_uring_alloc_task_context(struct task_struct *task) { struct io_uring_task *tctx; + int ret; tctx = kmalloc(sizeof(*tctx), GFP_KERNEL); if (unlikely(!tctx)) return -ENOMEM; + ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL); + if (unlikely(ret)) { + kfree(tctx); + return ret; + } + xa_init(&tctx->xa); init_waitqueue_head(&tctx->wait); tctx->last = NULL; tctx->in_idle = 0; - atomic_long_set(&tctx->req_issue, 0); - atomic_long_set(&tctx->req_complete, 0); + io_init_identity(&tctx->__identity); + tctx->identity = &tctx->__identity; task->io_uring = tctx; return 0; } @@ -7633,6 +7745,10 @@ void __io_uring_free(struct task_struct *tsk) struct io_uring_task *tctx = tsk->io_uring; WARN_ON_ONCE(!xa_empty(&tctx->xa)); + WARN_ON_ONCE(refcount_read(&tctx->identity->count) != 1); + if (tctx->identity != &tctx->__identity) + kfree(tctx->identity); + percpu_counter_destroy(&tctx->inflight); kfree(tctx); tsk->io_uring = NULL; } @@ -8209,11 +8325,14 @@ static int io_uring_fasync(int fd, struct file *file, int on) static int io_remove_personalities(int id, void *p, void *data) { struct io_ring_ctx *ctx = data; - const struct cred *cred; + struct io_identity *iod; - cred = idr_remove(&ctx->personality_idr, id); - if (cred) - put_cred(cred); + iod = idr_remove(&ctx->personality_idr, id); + if (iod) { + put_cred(iod->creds); + if (refcount_dec_and_test(&iod->count)) + kfree(iod); + } return 0; } @@ -8285,7 +8404,8 @@ static bool io_wq_files_match(struct io_wq_work *work, void *data) { struct files_struct *files = data; - return !files || work->files == files; + return !files || ((work->flags & IO_WQ_WORK_FILES) && + work->identity->files == files); } /* @@ -8440,7 +8560,8 @@ static bool io_uring_cancel_files(struct io_ring_ctx *ctx, spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { - if (files && req->work.files != files) + if (files && (req->work.flags & IO_WQ_WORK_FILES) && + req->work.identity->files != files) continue; /* req is being completed, ignore */ if (!refcount_inc_not_zero(&req->refs)) @@ -8612,12 +8733,6 @@ void __io_uring_files_cancel(struct files_struct *files) } } -static inline bool io_uring_task_idle(struct io_uring_task *tctx) -{ - return atomic_long_read(&tctx->req_issue) == - atomic_long_read(&tctx->req_complete); -} - /* * Find any io_uring fd that this task has registered or done IO on, and cancel * requests. @@ -8626,14 +8741,16 @@ void __io_uring_task_cancel(void) { struct io_uring_task *tctx = current->io_uring; DEFINE_WAIT(wait); - long completions; + s64 inflight; /* make sure overflow events are dropped */ tctx->in_idle = true; - while (!io_uring_task_idle(tctx)) { + do { /* read completions before cancelations */ - completions = atomic_long_read(&tctx->req_complete); + inflight = percpu_counter_sum(&tctx->inflight); + if (!inflight) + break; __io_uring_files_cancel(NULL); prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE); @@ -8642,12 +8759,10 @@ void __io_uring_task_cancel(void) * If we've seen completions, retry. This avoids a race where * a completion comes in before we did prepare_to_wait(). */ - if (completions != atomic_long_read(&tctx->req_complete)) + if (inflight != percpu_counter_sum(&tctx->inflight)) continue; - if (io_uring_task_idle(tctx)) - break; schedule(); - } + } while (1); finish_wait(&tctx->wait, &wait); tctx->in_idle = false; @@ -9113,7 +9228,10 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, ctx->compat = in_compat_syscall(); ctx->user = user; ctx->creds = get_current_cred(); - +#ifdef CONFIG_AUDIT + ctx->loginuid = current->loginuid; + ctx->sessionid = current->sessionid; +#endif ctx->sqo_task = get_task_struct(current); /* @@ -9281,23 +9399,33 @@ static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) static int io_register_personality(struct io_ring_ctx *ctx) { - const struct cred *creds = get_current_cred(); - int id; + struct io_identity *id; + int ret; - id = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1, - USHRT_MAX, GFP_KERNEL); - if (id < 0) - put_cred(creds); - return id; + id = kmalloc(sizeof(*id), GFP_KERNEL); + if (unlikely(!id)) + return -ENOMEM; + + io_init_identity(id); + id->creds = get_current_cred(); + + ret = idr_alloc_cyclic(&ctx->personality_idr, id, 1, USHRT_MAX, GFP_KERNEL); + if (ret < 0) { + put_cred(id->creds); + kfree(id); + } + return ret; } static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) { - const struct cred *old_creds; + struct io_identity *iod; - old_creds = idr_remove(&ctx->personality_idr, id); - if (old_creds) { - put_cred(old_creds); + iod = idr_remove(&ctx->personality_idr, id); + if (iod) { + put_cred(iod->creds); + if (refcount_dec_and_test(&iod->count)) + kfree(iod); return 0; } diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 1eabd91870e6..1d9488cf0534 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -417,7 +417,7 @@ void nsm_release(struct nsm_handle *nsm) /* * XDR functions for NSM. * - * See http://www.opengroup.org/ for details on the Network + * See https://www.opengroup.org/ for details on the Network * Status Monitor wire protocol. */ diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 222afba70bc0..29ec8b09a52d 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -94,6 +94,7 @@ enum { static const struct constant_table nfs_param_enums_local_lock[] = { { "all", Opt_local_lock_all }, { "flock", Opt_local_lock_flock }, + { "posix", Opt_local_lock_posix }, { "none", Opt_local_lock_none }, {} }; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 6b063227e34e..2bcbe38afe2e 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -32,9 +32,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - used to return pointer to the end of devname part of path - * @dentry - pointer to dentry + * @dentry_in - pointer to dentry * @buffer - result buffer - * @buflen - length of buffer + * @buflen_in - length of buffer * @flags - options (see below) * * Helper function for constructing the server pathname @@ -49,15 +49,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * the original device (export) name * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, - unsigned flags) +char *nfs_path(char **p, struct dentry *dentry_in, char *buffer, + ssize_t buflen_in, unsigned flags) { char *end; int namelen; unsigned seq; const char *base; + struct dentry *dentry; + ssize_t buflen; rename_retry: + buflen = buflen_in; + dentry = dentry_in; end = buffer+buflen; *--end = '\0'; buflen--; diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index 86777996cfec..b51424ff8159 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -67,7 +67,6 @@ struct nfs4_xattr_bucket { struct nfs4_xattr_cache { struct kref ref; - spinlock_t hash_lock; /* protects hashtable and lru */ struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE]; struct list_head lru; struct list_head dispose; @@ -882,7 +881,7 @@ nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc) { unsigned long count; - count = list_lru_count(&nfs4_xattr_cache_lru); + count = list_lru_shrink_count(&nfs4_xattr_cache_lru, sc); return vfs_pressure_ratio(count); } @@ -976,7 +975,7 @@ nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; - count = list_lru_count(lru); + count = list_lru_shrink_count(lru, sc); return vfs_pressure_ratio(count); } diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index cc50085e151c..0dc31ad2362e 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -45,6 +45,15 @@ #define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ encode_fallocate_maxsz) #define decode_deallocate_maxsz (op_decode_hdr_maxsz) +#define encode_read_plus_maxsz (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + 3) +#define NFS42_READ_PLUS_SEGMENT_SIZE (1 /* data_content4 */ + \ + 2 /* data_info4.di_offset */ + \ + 2 /* data_info4.di_length */) +#define decode_read_plus_maxsz (op_decode_hdr_maxsz + \ + 1 /* rpr_eof */ + \ + 1 /* rpr_contents count */ + \ + 2 * NFS42_READ_PLUS_SEGMENT_SIZE) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ @@ -128,6 +137,14 @@ decode_putfh_maxsz + \ decode_deallocate_maxsz + \ decode_getattr_maxsz) +#define NFS4_enc_read_plus_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_read_plus_maxsz) +#define NFS4_dec_read_plus_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_read_plus_maxsz) #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -324,6 +341,16 @@ static void encode_deallocate(struct xdr_stream *xdr, encode_fallocate(xdr, args); } +static void encode_read_plus(struct xdr_stream *xdr, + const struct nfs_pgio_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_READ_PLUS, decode_read_plus_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->stateid); + encode_uint64(xdr, args->offset); + encode_uint32(xdr, args->count); +} + static void encode_seek(struct xdr_stream *xdr, const struct nfs42_seek_args *args, struct compound_hdr *hdr) @@ -722,6 +749,28 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode READ_PLUS request + */ +static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const struct nfs_pgio_args *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_read_plus(xdr, args, &hdr); + + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->count, hdr.replen); + encode_nops(&hdr); +} + /* * Encode SEEK request */ @@ -970,6 +1019,97 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re return decode_op_hdr(xdr, OP_DEALLOCATE); } +static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res, + uint32_t *eof) +{ + uint32_t count, recvd; + uint64_t offset; + __be32 *p; + + p = xdr_inline_decode(xdr, 8 + 4); + if (unlikely(!p)) + return -EIO; + + p = xdr_decode_hyper(p, &offset); + count = be32_to_cpup(p); + recvd = xdr_align_data(xdr, res->count, count); + res->count += recvd; + + if (count > recvd) { + dprintk("NFS: server cheating in read reply: " + "count %u > recvd %u\n", count, recvd); + *eof = 0; + return 1; + } + + return 0; +} + +static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res, + uint32_t *eof) +{ + uint64_t offset, length, recvd; + __be32 *p; + + p = xdr_inline_decode(xdr, 8 + 8); + if (unlikely(!p)) + return -EIO; + + p = xdr_decode_hyper(p, &offset); + p = xdr_decode_hyper(p, &length); + recvd = xdr_expand_hole(xdr, res->count, length); + res->count += recvd; + + if (recvd < length) { + *eof = 0; + return 1; + } + return 0; +} + +static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) +{ + uint32_t eof, segments, type; + int status, i; + __be32 *p; + + status = decode_op_hdr(xdr, OP_READ_PLUS); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4 + 4); + if (unlikely(!p)) + return -EIO; + + eof = be32_to_cpup(p++); + segments = be32_to_cpup(p++); + if (segments == 0) + goto out; + + for (i = 0; i < segments; i++) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + type = be32_to_cpup(p++); + if (type == NFS4_CONTENT_DATA) + status = decode_read_plus_data(xdr, res, &eof); + else if (type == NFS4_CONTENT_HOLE) + status = decode_read_plus_hole(xdr, res, &eof); + else + return -EINVAL; + + if (status < 0) + return status; + if (status > 0) + break; + } + +out: + res->eof = eof; + return 0; +} + static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) { int status; @@ -1146,6 +1286,33 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp, return status; } +/* + * Decode READ_PLUS request + */ +static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfs_pgio_res *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_read_plus(xdr, res); + if (!status) + status = res->count; +out: + return status; +} + /* * Decode SEEK request */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 0c9505dc852c..065cb04222a1 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -599,6 +599,14 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0; } +static inline bool nfs4_stateid_is_next(const nfs4_stateid *s1, const nfs4_stateid *s2) +{ + u32 seq1 = be32_to_cpu(s1->seqid); + u32 seq2 = be32_to_cpu(s2->seqid); + + return seq2 == seq1 + 1U || (seq2 == 1U && seq1 == 0xffffffffU); +} + static inline bool nfs4_stateid_match_or_older(const nfs4_stateid *dst, const nfs4_stateid *src) { return nfs4_stateid_match_other(dst, src) && diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index daacc78a3d48..be7915c861ce 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1045,6 +1045,8 @@ static int nfs4_server_common_setup(struct nfs_server *server, server->caps |= server->nfs_client->cl_mvops->init_caps; if (server->flags & NFS_MOUNT_NORDIRPLUS) server->caps &= ~NFS_CAP_READDIRPLUS; + if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) + server->caps &= ~NFS_CAP_READ_PLUS; /* * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower * authentication. diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index fdfc77486ace..91be7f628e4a 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -145,7 +145,8 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, /* Only offload copy if superblock is the same */ if (file_in->f_op != &nfs4_file_operations) return -EXDEV; - if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) + if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY) || + !nfs_server_capable(file_inode(file_in), NFS_CAP_COPY)) return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) return -EOPNOTSUPP; diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 62e6eea5c516..8d8aba305ecc 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "internal.h" #include "netns.h" @@ -69,13 +70,13 @@ struct idmap { struct rpc_pipe *idmap_pipe; struct idmap_legacy_upcalldata *idmap_upcall_data; struct mutex idmap_mutex; - const struct cred *cred; + struct user_namespace *user_ns; }; static struct user_namespace *idmap_userns(const struct idmap *idmap) { - if (idmap && idmap->cred) - return idmap->cred->user_ns; + if (idmap && idmap->user_ns) + return idmap->user_ns; return &init_user_ns; } @@ -286,7 +287,7 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen, if (ret < 0) return ERR_PTR(ret); - if (!idmap->cred || idmap->cred->user_ns == &init_user_ns) + if (!idmap->user_ns || idmap->user_ns == &init_user_ns) rkey = request_key(&key_type_id_resolver, desc, ""); if (IS_ERR(rkey)) { mutex_lock(&idmap->idmap_mutex); @@ -462,7 +463,7 @@ nfs_idmap_new(struct nfs_client *clp) return -ENOMEM; mutex_init(&idmap->idmap_mutex); - idmap->cred = get_cred(clp->cl_rpcclient->cl_cred); + idmap->user_ns = get_user_ns(clp->cl_rpcclient->cl_cred->user_ns); rpc_init_pipe_dir_object(&idmap->idmap_pdo, &nfs_idmap_pipe_dir_object_ops, @@ -486,7 +487,7 @@ nfs_idmap_new(struct nfs_client *clp) err_destroy_pipe: rpc_destroy_pipe_data(idmap->idmap_pipe); err: - put_cred(idmap->cred); + get_user_ns(idmap->user_ns); kfree(idmap); return error; } @@ -503,7 +504,7 @@ nfs_idmap_delete(struct nfs_client *clp) &clp->cl_rpcclient->cl_pipedir_objects, &idmap->idmap_pdo); rpc_destroy_pipe_data(idmap->idmap_pipe); - put_cred(idmap->cred); + put_user_ns(idmap->user_ns); kfree(idmap); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 33efea1da3bb..9648203cc22e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -63,6 +63,7 @@ #include "callback.h" #include "pnfs.h" #include "netns.h" +#include "sysfs.h" #include "nfs4idmap.h" #include "nfs4session.h" #include "fscache.h" @@ -70,6 +71,10 @@ #include "nfs4trace.h" +#ifdef CONFIG_NFS_V4_2 +#include "nfs42.h" +#endif /* CONFIG_NFS_V4_2 */ + #define NFSDBG_FACILITY NFSDBG_PROC #define NFS4_BITMASK_SZ 3 @@ -107,6 +112,9 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, const struct cred *, bool); #endif +static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode, + struct nfs_server *server, + struct nfs4_label *label); #ifdef CONFIG_NFS_V4_SECURITY_LABEL static inline struct nfs4_label * @@ -1547,19 +1555,6 @@ static void nfs_state_log_update_open_stateid(struct nfs4_state *state) wake_up_all(&state->waitq); } -static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state, - const nfs4_stateid *stateid) -{ - u32 state_seqid = be32_to_cpu(state->open_stateid.seqid); - u32 stateid_seqid = be32_to_cpu(stateid->seqid); - - if (stateid_seqid == state_seqid + 1U || - (stateid_seqid == 1U && state_seqid == 0xffffffffU)) - nfs_state_log_update_open_stateid(state); - else - set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); -} - static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) { struct nfs_client *clp = state->owner->so_server->nfs_client; @@ -1585,21 +1580,19 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) * i.e. The stateid seqids have to be initialised to 1, and * are then incremented on every state transition. */ -static bool nfs_need_update_open_stateid(struct nfs4_state *state, +static bool nfs_stateid_is_sequential(struct nfs4_state *state, const nfs4_stateid *stateid) { - if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 || - !nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (test_bit(NFS_OPEN_STATE, &state->flags)) { + /* The common case - we're updating to a new sequence number */ + if (nfs4_stateid_match_other(stateid, &state->open_stateid) && + nfs4_stateid_is_next(&state->open_stateid, stateid)) { + return true; + } + } else { + /* This is the first OPEN in this generation */ if (stateid->seqid == cpu_to_be32(1)) - nfs_state_log_update_open_stateid(state); - else - set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); - return true; - } - - if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) { - nfs_state_log_out_of_order_open_stateid(state, stateid); - return true; + return true; } return false; } @@ -1673,16 +1666,16 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, int status = 0; for (;;) { - if (!nfs_need_update_open_stateid(state, stateid)) - return; - if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags)) + if (nfs_stateid_is_sequential(state, stateid)) break; + if (status) break; /* Rely on seqids for serialisation with NFSv4.0 */ if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client)) break; + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); /* * Ensure we process the state changes in the same order @@ -1693,6 +1686,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, spin_unlock(&state->owner->so_lock); rcu_read_unlock(); trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); + if (!signal_pending(current)) { if (schedule_timeout(5*HZ) == 0) status = -EAGAIN; @@ -3435,7 +3429,8 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, __be32 seqid_open; u32 dst_seqid; bool ret; - int seq; + int seq, status = -EAGAIN; + DEFINE_WAIT(wait); for (;;) { ret = false; @@ -3447,15 +3442,41 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, continue; break; } + + write_seqlock(&state->seqlock); seqid_open = state->open_stateid.seqid; - if (read_seqretry(&state->seqlock, seq)) - continue; dst_seqid = be32_to_cpu(dst->seqid); - if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) >= 0) - dst->seqid = cpu_to_be32(dst_seqid + 1); - else + + /* Did another OPEN bump the state's seqid? try again: */ + if ((s32)(be32_to_cpu(seqid_open) - dst_seqid) > 0) { dst->seqid = seqid_open; + write_sequnlock(&state->seqlock); + ret = true; + break; + } + + /* server says we're behind but we haven't seen the update yet */ + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); + prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); + write_sequnlock(&state->seqlock); + trace_nfs4_close_stateid_update_wait(state->inode, dst, 0); + + if (signal_pending(current)) + status = -EINTR; + else + if (schedule_timeout(5*HZ) != 0) + status = 0; + + finish_wait(&state->waitq, &wait); + + if (!status) + continue; + if (status == -EINTR) + break; + + /* we slept the whole 5 seconds, we must have lost a seqid */ + dst->seqid = cpu_to_be32(dst_seqid + 1); ret = true; break; } @@ -3632,9 +3653,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { /* Close-to-open cache consistency revalidation */ - if (!nfs4_have_delegation(inode, FMODE_READ)) + if (!nfs4_have_delegation(inode, FMODE_READ)) { calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; - else + nfs4_bitmask_adjust(calldata->arg.bitmask, inode, NFS_SERVER(inode), NULL); + } else calldata->arg.bitmask = NULL; } @@ -5255,28 +5277,60 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task, return true; } +static bool nfs4_read_plus_not_supported(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ + struct nfs_server *server = NFS_SERVER(hdr->inode); + struct rpc_message *msg = &task->tk_msg; + + if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] && + server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) { + server->caps &= ~NFS_CAP_READ_PLUS; + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + rpc_restart_call_prepare(task); + return true; + } + return false; +} + static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - dprintk("--> %s\n", __func__); if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; if (nfs4_read_stateid_changed(task, &hdr->args)) return -EAGAIN; + if (nfs4_read_plus_not_supported(task, hdr)) + return -EAGAIN; if (task->tk_status > 0) nfs_invalidate_atime(hdr->inode); return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : nfs4_read_done_cb(task, hdr); } +#ifdef CONFIG_NFS_V4_2 +static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg) +{ + if (server->caps & NFS_CAP_READ_PLUS) + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS]; + else + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; +} +#else +static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg) +{ + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; +} +#endif /* CONFIG_NFS_V4_2 */ + static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, struct rpc_message *msg) { hdr->timestamp = jiffies; if (!hdr->pgio_done_cb) hdr->pgio_done_cb = nfs4_read_done_cb; - msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg); nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); } @@ -5360,6 +5414,38 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } +static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode, + struct nfs_server *server, + struct nfs4_label *label) +{ + + unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + + if ((cache_validity & NFS_INO_INVALID_DATA) || + (cache_validity & NFS_INO_REVAL_PAGECACHE) || + (cache_validity & NFS_INO_REVAL_FORCED) || + (cache_validity & NFS_INO_INVALID_OTHER)) + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode); + + if (cache_validity & NFS_INO_INVALID_ATIME) + bitmask[1] |= FATTR4_WORD1_TIME_ACCESS; + if (cache_validity & NFS_INO_INVALID_ACCESS) + bitmask[0] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | + FATTR4_WORD1_OWNER_GROUP; + if (cache_validity & NFS_INO_INVALID_ACL) + bitmask[0] |= FATTR4_WORD0_ACL; + if (cache_validity & NFS_INO_INVALID_LABEL) + bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL; + if (cache_validity & NFS_INO_INVALID_CTIME) + bitmask[0] |= FATTR4_WORD0_CHANGE; + if (cache_validity & NFS_INO_INVALID_MTIME) + bitmask[1] |= FATTR4_WORD1_TIME_MODIFY; + if (cache_validity & NFS_INO_INVALID_SIZE) + bitmask[0] |= FATTR4_WORD0_SIZE; + if (cache_validity & NFS_INO_INVALID_BLOCKS) + bitmask[1] |= FATTR4_WORD1_SPACE_USED; +} + static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, struct rpc_message *msg, struct rpc_clnt **clnt) @@ -5369,8 +5455,10 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, if (!nfs4_write_need_cache_consistency_data(hdr)) { hdr->args.bitmask = NULL; hdr->res.fattr = NULL; - } else + } else { hdr->args.bitmask = server->cache_consistency_bitmask; + nfs4_bitmask_adjust(hdr->args.bitmask, hdr->inode, server, NULL); + } if (!hdr->pgio_done_cb) hdr->pgio_done_cb = nfs4_write_done_cb; @@ -6006,9 +6094,34 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, memcpy(bootverf->data, verf, sizeof(bootverf->data)); } +static size_t +nfs4_get_uniquifier(struct nfs_client *clp, char *buf, size_t buflen) +{ + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); + struct nfs_netns_client *nn_clp = nn->nfs_client; + const char *id; + + buf[0] = '\0'; + + if (nn_clp) { + rcu_read_lock(); + id = rcu_dereference(nn_clp->identifier); + if (id) + strscpy(buf, id, buflen); + rcu_read_unlock(); + } + + if (nfs4_client_id_uniquifier[0] != '\0' && buf[0] == '\0') + strscpy(buf, nfs4_client_id_uniquifier, buflen); + + return strlen(buf); +} + static int nfs4_init_nonuniform_client_string(struct nfs_client *clp) { + char buf[NFS4_CLIENT_ID_UNIQ_LEN]; + size_t buflen; size_t len; char *str; @@ -6022,8 +6135,11 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + 1; rcu_read_unlock(); - if (nfs4_client_id_uniquifier[0] != '\0') - len += strlen(nfs4_client_id_uniquifier) + 1; + + buflen = nfs4_get_uniquifier(clp, buf, sizeof(buf)); + if (buflen) + len += buflen + 1; + if (len > NFS4_OPAQUE_LIMIT + 1) return -EINVAL; @@ -6037,10 +6153,9 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) return -ENOMEM; rcu_read_lock(); - if (nfs4_client_id_uniquifier[0] != '\0') + if (buflen) scnprintf(str, len, "Linux NFSv4.0 %s/%s/%s", - clp->cl_rpcclient->cl_nodename, - nfs4_client_id_uniquifier, + clp->cl_rpcclient->cl_nodename, buf, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); else @@ -6054,51 +6169,24 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) return 0; } -static int -nfs4_init_uniquifier_client_string(struct nfs_client *clp) -{ - size_t len; - char *str; - - len = 10 + 10 + 1 + 10 + 1 + - strlen(nfs4_client_id_uniquifier) + 1 + - strlen(clp->cl_rpcclient->cl_nodename) + 1; - - if (len > NFS4_OPAQUE_LIMIT + 1) - return -EINVAL; - - /* - * Since this string is allocated at mount time, and held until the - * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying - * about a memory-reclaim deadlock. - */ - str = kmalloc(len, GFP_KERNEL); - if (!str) - return -ENOMEM; - - scnprintf(str, len, "Linux NFSv%u.%u %s/%s", - clp->rpc_ops->version, clp->cl_minorversion, - nfs4_client_id_uniquifier, - clp->cl_rpcclient->cl_nodename); - clp->cl_owner_id = str; - return 0; -} - static int nfs4_init_uniform_client_string(struct nfs_client *clp) { + char buf[NFS4_CLIENT_ID_UNIQ_LEN]; + size_t buflen; size_t len; char *str; if (clp->cl_owner_id != NULL) return 0; - if (nfs4_client_id_uniquifier[0] != '\0') - return nfs4_init_uniquifier_client_string(clp); - len = 10 + 10 + 1 + 10 + 1 + strlen(clp->cl_rpcclient->cl_nodename) + 1; + buflen = nfs4_get_uniquifier(clp, buf, sizeof(buf)); + if (buflen) + len += buflen + 1; + if (len > NFS4_OPAQUE_LIMIT + 1) return -EINVAL; @@ -6111,9 +6199,14 @@ nfs4_init_uniform_client_string(struct nfs_client *clp) if (!str) return -ENOMEM; - scnprintf(str, len, "Linux NFSv%u.%u %s", - clp->rpc_ops->version, clp->cl_minorversion, - clp->cl_rpcclient->cl_nodename); + if (buflen) + scnprintf(str, len, "Linux NFSv%u.%u %s/%s", + clp->rpc_ops->version, clp->cl_minorversion, + buf, clp->cl_rpcclient->cl_nodename); + else + scnprintf(str, len, "Linux NFSv%u.%u %s", + clp->rpc_ops->version, clp->cl_minorversion, + clp->cl_rpcclient->cl_nodename); clp->cl_owner_id = str; return 0; } @@ -6406,6 +6499,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; data->args.bitmask = server->cache_consistency_bitmask; + nfs4_bitmask_adjust(data->args.bitmask, inode, server, NULL); nfs_copy_fh(&data->fh, NFS_FH(inode)); nfs4_stateid_copy(&data->stateid, stateid); data->res.fattr = &data->fattr; @@ -7442,7 +7536,7 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len) if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) { len = security_inode_listsecurity(inode, list, list_len); - if (list_len && len > list_len) + if (len >= 0 && list_len && len > list_len) return -ERANGE; } return len; @@ -8042,9 +8136,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or * DS flags set. */ -static int nfs4_check_cl_exchange_flags(u32 flags) +static int nfs4_check_cl_exchange_flags(u32 flags, u32 version) { - if (flags & ~EXCHGID4_FLAG_MASK_R) + if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R)) + goto out_inval; + else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R)) goto out_inval; if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) && (flags & EXCHGID4_FLAG_USE_NON_PNFS)) @@ -8457,7 +8553,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre if (status != 0) goto out; - status = nfs4_check_cl_exchange_flags(resp->flags); + status = nfs4_check_cl_exchange_flags(resp->flags, + clp->cl_mvops->minor_version); if (status != 0) goto out; @@ -9696,7 +9793,6 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; - struct rpc_clnt *clnt = server->client; struct nfs4_call_sync_data data = { .seq_server = server, .seq_args = &args.seq_args, @@ -9713,8 +9809,7 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, int status; if (use_integrity) { - clnt = server->nfs_client->cl_rpcclient; - task_setup.rpc_client = clnt; + task_setup.rpc_client = server->nfs_client->cl_rpcclient; cred = nfs4_get_clid_cred(server->nfs_client); msg.rpc_cred = cred; @@ -10168,7 +10263,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_SEEK | NFS_CAP_LAYOUTSTATS | NFS_CAP_CLONE - | NFS_CAP_LAYOUTERROR, + | NFS_CAP_LAYOUTERROR + | NFS_CAP_READ_PLUS, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index b4f852d4d099..484c1da96dea 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1511,6 +1511,7 @@ DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_close_stateid_update_wait); DECLARE_EVENT_CLASS(nfs4_getattr_event, TP_PROTO( diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0b3510f62623..c6dbfcae7517 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5308,7 +5308,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, uint32_t attrlen, bitmap[3] = {0}; int status; - unsigned int pg_offset; res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -5316,9 +5315,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, xdr_enter_page(xdr, xdr->buf->page_len); - /* Calculate the offset of the page data */ - pg_offset = xdr->buf->head[0].iov_len; - if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) @@ -5331,7 +5327,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of * variable length bitmaps.*/ - res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset; + res->acl_data_offset = xdr_page_pos(xdr); res->acl_len = attrlen; /* Check for receive buffer overflow */ @@ -7619,6 +7615,7 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC42(SETXATTR, enc_setxattr, dec_setxattr), PROC42(LISTXATTRS, enc_listxattrs, dec_listxattrs), PROC42(REMOVEXATTR, enc_removexattr, dec_removexattr), + PROC42(READ_PLUS, enc_read_plus, dec_read_plus), }; static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)]; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 71f7741126b6..0e50b9d45c32 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -902,7 +902,7 @@ pnfs_destroy_layouts_byclid(struct nfs_client *clp, } /* - * Called by the state manger to remove all layouts established under an + * Called by the state manager to remove all layouts established under an * expired lease. */ void diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f943e37853fa..78c46a517fcf 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -889,7 +889,7 @@ static struct nfs_server *nfs_try_mount_request(struct fs_context *fc) default: if (rpcauth_get_gssinfo(flavor, &info) != 0) continue; - /* Fallthrough */ + break; } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); ctx->selected_flavor = flavor; diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index c489496b5659..8cb70755e3c9 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -79,7 +79,12 @@ static ssize_t nfs_netns_identifier_show(struct kobject *kobj, struct nfs_netns_client *c = container_of(kobj, struct nfs_netns_client, kobject); - return scnprintf(buf, PAGE_SIZE, "%s\n", c->identifier); + ssize_t ret; + + rcu_read_lock(); + ret = scnprintf(buf, PAGE_SIZE, "%s\n", rcu_dereference(c->identifier)); + rcu_read_unlock(); + return ret; } /* Strip trailing '\n' */ @@ -107,7 +112,7 @@ static ssize_t nfs_netns_identifier_store(struct kobject *kobj, p = kmemdup_nul(buf, len, GFP_KERNEL); if (!p) return -ENOMEM; - old = xchg(&c->identifier, p); + old = rcu_dereference_protected(xchg(&c->identifier, (char __rcu *)p), 1); if (old) { synchronize_rcu(); kfree(old); @@ -121,7 +126,7 @@ static void nfs_netns_client_release(struct kobject *kobj) struct nfs_netns_client, kobject); - kfree(c->identifier); + kfree(rcu_dereference_raw(c->identifier)); kfree(c); } diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h index ebcbdc40483b..5501ef573c32 100644 --- a/fs/nfs/sysfs.h +++ b/fs/nfs/sysfs.h @@ -11,7 +11,7 @@ struct nfs_netns_client { struct kobject kobject; struct net *net; - const char *identifier; + const char __rcu *identifier; }; extern struct kobject *nfs_client_kobj; diff --git a/fs/proc/base.c b/fs/proc/base.c index 991730608907..a068e9198915 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1269,6 +1269,10 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, kuid_t kloginuid; int rv; + /* Don't let kthreads write their own loginuid */ + if (current->flags & PF_KTHREAD) + return -EPERM; + rcu_read_lock(); if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { rcu_read_unlock(); diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 76371aaae2d1..60b324efd1c4 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -54,7 +54,7 @@ struct ceph_connection_operations { int (*check_message_signature) (struct ceph_msg *msg); }; -/* use format string %s%d */ +/* use format string %s%lld */ #define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num) struct ceph_messenger { diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index ce4ffeb384d7..b658961156a0 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -142,7 +142,7 @@ int ceph_monc_get_version(struct ceph_mon_client *monc, const char *what, int ceph_monc_get_version_async(struct ceph_mon_client *monc, const char *what, ceph_monc_callback_t cb, u64 private_data); -int ceph_monc_blacklist_add(struct ceph_mon_client *monc, +int ceph_monc_blocklist_add(struct ceph_mon_client *monc, struct ceph_entity_addr *client_addr); extern int ceph_monc_open_session(struct ceph_mon_client *monc); diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 3f4498fef6ad..cad9acfbc320 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -137,6 +137,17 @@ int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp, const char *fmt, ...); void ceph_oid_destroy(struct ceph_object_id *oid); +struct workspace_manager { + struct list_head idle_ws; + spinlock_t ws_lock; + /* Number of free workspaces */ + int free_ws; + /* Total number of allocated workspaces */ + atomic_t total_ws; + /* Waiters for a free workspace */ + wait_queue_head_t ws_wait; +}; + struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; @@ -184,8 +195,7 @@ struct ceph_osdmap { * the list of osds that store+replicate them. */ struct crush_map *crush; - struct mutex crush_workspace_mutex; - void *crush_workspace; + struct workspace_manager crush_wsm; }; static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd) diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 3a518fd0eaad..43a7a1573b51 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -424,7 +424,7 @@ enum { }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ -#define EBLACKLISTED ESHUTDOWN /* blacklisted */ +#define EBLOCKLISTED ESHUTDOWN /* blocklisted */ /* xattr comparison */ enum { diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 2f811baf78d2..30dba392b730 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -346,6 +346,9 @@ struct crush_work_bucket { struct crush_work { struct crush_work_bucket **work; /* Per-bucket working store */ +#ifdef __KERNEL__ + struct list_head item; +#endif }; #ifdef __KERNEL__ diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index 1e4e0de4ef8b..1ef421818d3a 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -38,6 +38,18 @@ static inline int of_i2c_setup_smbus_alert(struct i2c_adapter *adap) return 0; } #endif +#if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_I2C_SLAVE) +struct i2c_client *i2c_new_slave_host_notify_device(struct i2c_adapter *adapter); +void i2c_free_slave_host_notify_device(struct i2c_client *client); +#else +static inline struct i2c_client *i2c_new_slave_host_notify_device(struct i2c_adapter *adapter) +{ + return ERR_PTR(-ENOSYS); +} +static inline void i2c_free_slave_host_notify_device(struct i2c_client *client) +{ +} +#endif #if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_DMI) void i2c_register_spd(struct i2c_adapter *adap); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index fc55ea41d323..56622658b215 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -344,7 +344,7 @@ const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id, static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) { - struct device * const dev = container_of(kobj, struct device, kobj); + struct device * const dev = kobj_to_dev(kobj); return to_i2c_client(dev); } diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 96315cfaf6d1..868364cea3b7 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -4,18 +4,33 @@ #include #include -#include + +struct io_identity { + struct files_struct *files; + struct mm_struct *mm; +#ifdef CONFIG_BLK_CGROUP + struct cgroup_subsys_state *blkcg_css; +#endif + const struct cred *creds; + struct nsproxy *nsproxy; + struct fs_struct *fs; + unsigned long fsize; +#ifdef CONFIG_AUDIT + kuid_t loginuid; + unsigned int sessionid; +#endif + refcount_t count; +}; struct io_uring_task { /* submission side */ struct xarray xa; struct wait_queue_head wait; struct file *last; - atomic_long_t req_issue; - - /* completion side */ - bool in_idle ____cacheline_aligned_in_smp; - atomic_long_t req_complete; + struct percpu_counter inflight; + struct io_identity __identity; + struct io_identity *identity; + bool in_idle; }; #if defined(CONFIG_IO_URING) diff --git a/include/linux/mfd/mt6397/rtc.h b/include/linux/mfd/mt6397/rtc.h index 66989a16221a..c3748b53bf7d 100644 --- a/include/linux/mfd/mt6397/rtc.h +++ b/include/linux/mfd/mt6397/rtc.h @@ -72,7 +72,6 @@ struct mtk_rtc_data { }; struct mt6397_rtc { - struct device *dev; struct rtc_device *rtc_dev; /* Protect register access from multiple tasks */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index b8360be141da..9dc7eeac924f 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -551,13 +551,13 @@ enum { NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LAYOUTERROR, - NFSPROC4_CLNT_COPY_NOTIFY, NFSPROC4_CLNT_GETXATTR, NFSPROC4_CLNT_SETXATTR, NFSPROC4_CLNT_LISTXATTRS, NFSPROC4_CLNT_REMOVEXATTR, + NFSPROC4_CLNT_READ_PLUS, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7eae72a8762e..38e60ec742df 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -287,5 +287,6 @@ struct nfs_server { #define NFS_CAP_LAYOUTERROR (1U << 26) #define NFS_CAP_COPY_NOTIFY (1U << 27) #define NFS_CAP_XATTR (1U << 28) +#define NFS_CAP_READ_PLUS (1U << 29) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 69cb46f7b8d2..d63cb862d58e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -525,7 +525,7 @@ struct nfs_closeargs { struct nfs_seqid * seqid; fmode_t fmode; u32 share_access; - const u32 * bitmask; + u32 * bitmask; struct nfs4_layoutreturn_args *lr_args; }; @@ -608,7 +608,7 @@ struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; const nfs4_stateid *stateid; - const u32 * bitmask; + u32 * bitmask; struct nfs4_layoutreturn_args *lr_args; }; @@ -648,7 +648,7 @@ struct nfs_pgio_args { union { unsigned int replen; /* used by read */ struct { - const u32 * bitmask; /* used by write */ + u32 * bitmask; /* used by write */ enum nfs3_stable_how stable; /* used by write */ }; }; @@ -657,7 +657,7 @@ struct nfs_pgio_args { struct nfs_pgio_res { struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; - __u32 count; + __u64 count; __u32 op_status; union { struct { diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c2a9f7c90727..64ad900ac742 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -376,7 +377,7 @@ radix_tree_chunk_size(struct radix_tree_iter *iter) * radix_tree_next_slot - find next slot in chunk * * @slot: pointer to current slot - * @iter: pointer to interator state + * @iter: pointer to iterator state * @flags: RADIX_TREE_ITER_*, should be constant * Returns: pointer to next slot, or NULL if there no more left * diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index d796058cdff2..f07c334c599f 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -4,7 +4,7 @@ NetApp provides this source code under the GPL v2 License. The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. +https://opensource.org/licenses/gpl-license.php. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 10891b70fc7b..d0965e2997b0 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -45,7 +45,8 @@ */ struct cache_head { struct hlist_node cache_list; - time64_t expiry_time; /* After time time, don't use the data */ + time64_t expiry_time; /* After time expiry_time, don't use + * the data */ time64_t last_refresh; /* If CACHE_PENDING, this is when upcall was * sent, else this is when update was * received, though it is alway set to diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index bea40d9f03a1..43f854487539 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -143,7 +143,7 @@ typedef __be32 rpc_fraghdr; /* * Well-known netids. See: * - * http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml + * https://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml */ #define RPCBIND_NETID_UDP "udp" #define RPCBIND_NETID_TCP "tcp" diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5a6a81b7cd9f..fe7ff7f5b584 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -240,6 +240,7 @@ extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); +extern unsigned int xdr_page_pos(const struct xdr_stream *xdr); extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, @@ -249,6 +250,8 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); +extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); /** * xdr_stream_remaining - Return the number of bytes remaining in the stream diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 5cdf441f6377..92c0160b3352 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1286,6 +1286,8 @@ static inline bool xa_is_advanced(const void *entry) */ typedef void (*xa_update_node_t)(struct xa_node *node); +void xa_delete_node(struct xa_node *, xa_update_node_t); + /* * The xa_state is opaque to its users. It contains various different pieces * of state involved in the current operation on the XArray. It should be @@ -1544,10 +1546,21 @@ static inline void xas_split_alloc(struct xa_state *xas, void *entry, static inline void *xas_reload(struct xa_state *xas) { struct xa_node *node = xas->xa_node; + void *entry; + char offset; - if (node) - return xa_entry(xas->xa, node, xas->xa_offset); - return xa_head(xas->xa); + if (!node) + return xa_head(xas->xa); + if (IS_ENABLED(CONFIG_XARRAY_MULTI)) { + offset = (xas->xa_index >> node->shift) & XA_CHUNK_MASK; + entry = xa_entry(xas->xa, node, offset); + if (!xa_is_sibling(entry)) + return entry; + offset = xa_to_sibling(entry); + } else { + offset = xas->xa_offset; + } + return xa_entry(xas->xa, node, offset); } /** @@ -1736,13 +1749,12 @@ enum { * @xas: XArray operation state. * @entry: Entry retrieved from the array. * - * The loop body will be executed for each entry in the XArray that lies - * within the range specified by @xas. If the loop completes successfully, - * any entries that lie in this range will be replaced by @entry. The caller - * may break out of the loop; if they do so, the contents of the XArray will - * be unchanged. The operation may fail due to an out of memory condition. - * The caller may also call xa_set_err() to exit the loop while setting an - * error to record the reason. + * The loop body will be executed for each entry in the XArray that + * lies within the range specified by @xas. If the loop terminates + * normally, @entry will be %NULL. The user may break out of the loop, + * which will leave @entry set to the conflicting entry. The caller + * may also call xa_set_err() to exit the loop while setting an error + * to record the reason. */ #define xas_for_each_conflict(xas, entry) \ while ((entry = xas_find_conflict(xas))) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index b6aad52beb62..bf1065772228 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -424,7 +424,6 @@ DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); -DEFINE_RXPRT_EVENT(xprtrdma_op_setport); TRACE_EVENT(xprtrdma_op_connect, TP_PROTO( @@ -1188,68 +1187,6 @@ TRACE_EVENT(xprtrdma_decode_seg, ) ); -/** - ** Allocation/release of rpcrdma_reqs and rpcrdma_reps - **/ - -TRACE_EVENT(xprtrdma_op_allocate, - TP_PROTO( - const struct rpc_task *task, - const struct rpcrdma_req *req - ), - - TP_ARGS(task, req), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(const void *, req) - __field(size_t, callsize) - __field(size_t, rcvsize) - ), - - TP_fast_assign( - __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client->cl_clid; - __entry->req = req; - __entry->callsize = task->tk_rqstp->rq_callsize; - __entry->rcvsize = task->tk_rqstp->rq_rcvsize; - ), - - TP_printk("task:%u@%u req=%p (%zu, %zu)", - __entry->task_id, __entry->client_id, - __entry->req, __entry->callsize, __entry->rcvsize - ) -); - -TRACE_EVENT(xprtrdma_op_free, - TP_PROTO( - const struct rpc_task *task, - const struct rpcrdma_req *req - ), - - TP_ARGS(task, req), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(const void *, req) - __field(const void *, rep) - ), - - TP_fast_assign( - __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client->cl_clid; - __entry->req = req; - __entry->rep = req->rl_reply; - ), - - TP_printk("task:%u@%u req=%p rep=%p", - __entry->task_id, __entry->client_id, - __entry->req, __entry->rep - ) -); - /** ** Callback events **/ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 65d7dfbbc9cd..f45b3c01370c 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -259,8 +259,10 @@ DECLARE_EVENT_CLASS(rpc_task_status, TP_ARGS(task)) DEFINE_RPC_STATUS_EVENT(call); -DEFINE_RPC_STATUS_EVENT(bind); DEFINE_RPC_STATUS_EVENT(connect); +DEFINE_RPC_STATUS_EVENT(timeout); +DEFINE_RPC_STATUS_EVENT(retry_refresh); +DEFINE_RPC_STATUS_EVENT(refresh); TRACE_EVENT(rpc_request, TP_PROTO(const struct rpc_task *task), @@ -385,7 +387,10 @@ DECLARE_EVENT_CLASS(rpc_task_running, DEFINE_RPC_RUNNING_EVENT(begin); DEFINE_RPC_RUNNING_EVENT(run_action); +DEFINE_RPC_RUNNING_EVENT(sync_sleep); +DEFINE_RPC_RUNNING_EVENT(sync_wake); DEFINE_RPC_RUNNING_EVENT(complete); +DEFINE_RPC_RUNNING_EVENT(timeout); DEFINE_RPC_RUNNING_EVENT(signalled); DEFINE_RPC_RUNNING_EVENT(end); @@ -517,6 +522,49 @@ DEFINE_RPC_REPLY_EVENT(stale_creds); DEFINE_RPC_REPLY_EVENT(bad_creds); DEFINE_RPC_REPLY_EVENT(auth_tooweak); +#define DEFINE_RPCB_ERROR_EVENT(name) \ + DEFINE_EVENT(rpc_reply_event, rpcb_##name##_err, \ + TP_PROTO( \ + const struct rpc_task *task \ + ), \ + TP_ARGS(task)) + +DEFINE_RPCB_ERROR_EVENT(prog_unavail); +DEFINE_RPCB_ERROR_EVENT(timeout); +DEFINE_RPCB_ERROR_EVENT(bind_version); +DEFINE_RPCB_ERROR_EVENT(unreachable); +DEFINE_RPCB_ERROR_EVENT(unrecognized); + +TRACE_EVENT(rpc_buf_alloc, + TP_PROTO( + const struct rpc_task *task, + int status + ), + + TP_ARGS(task, status), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(size_t, callsize) + __field(size_t, recvsize) + __field(int, status) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->callsize = task->tk_rqstp->rq_callsize; + __entry->recvsize = task->tk_rqstp->rq_rcvsize; + __entry->status = status; + ), + + TP_printk("task:%u@%u callsize=%zu recvsize=%zu status=%d", + __entry->task_id, __entry->client_id, + __entry->callsize, __entry->recvsize, __entry->status + ) +); + TRACE_EVENT(rpc_call_rpcerror, TP_PROTO( const struct rpc_task *task, @@ -868,6 +916,34 @@ DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection); DEFINE_RPC_SOCKET_EVENT(rpc_socket_close); DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown); +TRACE_EVENT(rpc_socket_nospace, + TP_PROTO( + const struct rpc_rqst *rqst, + const struct sock_xprt *transport + ), + + TP_ARGS(rqst, transport), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(unsigned int, total) + __field(unsigned int, remaining) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->total = rqst->rq_slen; + __entry->remaining = rqst->rq_slen - transport->xmit.offset; + ), + + TP_printk("task:%u@%u total=%u remaining=%u", + __entry->task_id, __entry->client_id, + __entry->total, __entry->remaining + ) +); + TRACE_DEFINE_ENUM(XPRT_LOCKED); TRACE_DEFINE_ENUM(XPRT_CONNECTED); TRACE_DEFINE_ENUM(XPRT_CONNECTING); @@ -925,6 +1001,7 @@ DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class, TP_ARGS(xprt)) DEFINE_RPC_XPRT_LIFETIME_EVENT(create); +DEFINE_RPC_XPRT_LIFETIME_EVENT(connect); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force); @@ -969,7 +1046,6 @@ DECLARE_EVENT_CLASS(rpc_xprt_event, DEFINE_RPC_XPRT_EVENT(timer); DEFINE_RPC_XPRT_EVENT(lookup_rqst); -DEFINE_RPC_XPRT_EVENT(complete_rqst); TRACE_EVENT(xprt_transmit, TP_PROTO( @@ -1002,37 +1078,6 @@ TRACE_EVENT(xprt_transmit, __entry->seqno, __entry->status) ); -TRACE_EVENT(xprt_enq_xmit, - TP_PROTO( - const struct rpc_task *task, - int stage - ), - - TP_ARGS(task, stage), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(u32, xid) - __field(u32, seqno) - __field(int, stage) - ), - - TP_fast_assign( - __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client ? - task->tk_client->cl_clid : -1; - __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); - __entry->seqno = task->tk_rqstp->rq_seqno; - __entry->stage = stage; - ), - - TP_printk( - "task:%u@%u xid=0x%08x seqno=%u stage=%d", - __entry->task_id, __entry->client_id, __entry->xid, - __entry->seqno, __entry->stage) -); - TRACE_EVENT(xprt_ping, TP_PROTO(const struct rpc_xprt *xprt, int status), @@ -1095,6 +1140,7 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, DEFINE_WRITELOCK_EVENT(reserve_xprt); DEFINE_WRITELOCK_EVENT(release_xprt); +DEFINE_WRITELOCK_EVENT(transmit_queued); DECLARE_EVENT_CLASS(xprt_cong_event, TP_PROTO( @@ -1147,6 +1193,30 @@ DEFINE_CONG_EVENT(release_cong); DEFINE_CONG_EVENT(get_cong); DEFINE_CONG_EVENT(put_cong); +TRACE_EVENT(xprt_reserve, + TP_PROTO( + const struct rpc_rqst *rqst + ), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqst->rq_xid); + ), + + TP_printk("task:%u@%u xid=0x%08x", + __entry->task_id, __entry->client_id, __entry->xid + ) +); + TRACE_EVENT(xs_stream_read_data, TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total), @@ -1202,6 +1272,156 @@ TRACE_EVENT(xs_stream_read_request, __entry->copied, __entry->reclen, __entry->offset) ); +TRACE_EVENT(rpcb_getport, + TP_PROTO( + const struct rpc_clnt *clnt, + const struct rpc_task *task, + unsigned int bind_version + ), + + TP_ARGS(clnt, task, bind_version), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(unsigned int, program) + __field(unsigned int, version) + __field(int, protocol) + __field(unsigned int, bind_version) + __string(servername, task->tk_xprt->servername) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = clnt->cl_clid; + __entry->program = clnt->cl_prog; + __entry->version = clnt->cl_vers; + __entry->protocol = task->tk_xprt->prot; + __entry->bind_version = bind_version; + __assign_str(servername, task->tk_xprt->servername); + ), + + TP_printk("task:%u@%u server=%s program=%u version=%u protocol=%d bind_version=%u", + __entry->task_id, __entry->client_id, __get_str(servername), + __entry->program, __entry->version, __entry->protocol, + __entry->bind_version + ) +); + +TRACE_EVENT(rpcb_setport, + TP_PROTO( + const struct rpc_task *task, + int status, + unsigned short port + ), + + TP_ARGS(task, status, port), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(int, status) + __field(unsigned short, port) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->status = status; + __entry->port = port; + ), + + TP_printk("task:%u@%u status=%d port=%u", + __entry->task_id, __entry->client_id, + __entry->status, __entry->port + ) +); + +TRACE_EVENT(pmap_register, + TP_PROTO( + u32 program, + u32 version, + int protocol, + unsigned short port + ), + + TP_ARGS(program, version, protocol, port), + + TP_STRUCT__entry( + __field(unsigned int, program) + __field(unsigned int, version) + __field(int, protocol) + __field(unsigned int, port) + ), + + TP_fast_assign( + __entry->program = program; + __entry->version = version; + __entry->protocol = protocol; + __entry->port = port; + ), + + TP_printk("program=%u version=%u protocol=%d port=%u", + __entry->program, __entry->version, + __entry->protocol, __entry->port + ) +); + +TRACE_EVENT(rpcb_register, + TP_PROTO( + u32 program, + u32 version, + const char *addr, + const char *netid + ), + + TP_ARGS(program, version, addr, netid), + + TP_STRUCT__entry( + __field(unsigned int, program) + __field(unsigned int, version) + __string(addr, addr) + __string(netid, netid) + ), + + TP_fast_assign( + __entry->program = program; + __entry->version = version; + __assign_str(addr, addr); + __assign_str(netid, netid); + ), + + TP_printk("program=%u version=%u addr=%s netid=%s", + __entry->program, __entry->version, + __get_str(addr), __get_str(netid) + ) +); + +TRACE_EVENT(rpcb_unregister, + TP_PROTO( + u32 program, + u32 version, + const char *netid + ), + + TP_ARGS(program, version, netid), + + TP_STRUCT__entry( + __field(unsigned int, program) + __field(unsigned int, version) + __string(netid, netid) + ), + + TP_fast_assign( + __entry->program = program; + __entry->version = version; + __assign_str(netid, netid); + ), + + TP_printk("program=%u version=%u netid=%s", + __entry->program, __entry->version, __get_str(netid) + ) +); DECLARE_EVENT_CLASS(svc_xdr_buf_class, TP_PROTO( diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index bf197e99b98f..ed5415e0f1c1 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -139,6 +139,8 @@ #define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A 0x40000000 #define EXCHGID4_FLAG_CONFIRMED_R 0x80000000 + +#define EXCHGID4_FLAG_SUPP_FENCE_OPS 0x00000004 /* * Since the validity of these bits depends on whether * they're set in the argument or response, have separate @@ -146,6 +148,7 @@ */ #define EXCHGID4_FLAG_MASK_A 0x40070103 #define EXCHGID4_FLAG_MASK_R 0x80070103 +#define EXCHGID4_2_FLAG_MASK_R 0x80070107 #define SEQ4_STATUS_CB_PATH_DOWN 0x00000001 #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002 diff --git a/lib/idr.c b/lib/idr.c index 3fa8be43696f..f4ab4f4aa3c7 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -471,6 +471,7 @@ int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max, goto retry; nospc: xas_unlock_irqrestore(&xas, flags); + kfree(alloc); return -ENOSPC; } EXPORT_SYMBOL(ida_alloc_range); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 005ced92a4a9..3a4da11b804d 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -20,7 +20,6 @@ #include #include #include -#include #include /* in_interrupt() */ #include #include diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 8262c3f05a5d..8294f43f4981 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -289,6 +289,27 @@ static noinline void check_xa_mark_2(struct xarray *xa) xa_destroy(xa); } +static noinline void check_xa_mark_3(struct xarray *xa) +{ +#ifdef CONFIG_XARRAY_MULTI + XA_STATE(xas, xa, 0x41); + void *entry; + int count = 0; + + xa_store_order(xa, 0x40, 2, xa_mk_index(0x40), GFP_KERNEL); + xa_set_mark(xa, 0x41, XA_MARK_0); + + rcu_read_lock(); + xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0) { + count++; + XA_BUG_ON(xa, entry != xa_mk_index(0x40)); + } + XA_BUG_ON(xa, count != 1); + rcu_read_unlock(); + xa_destroy(xa); +#endif +} + static noinline void check_xa_mark(struct xarray *xa) { unsigned long index; @@ -297,6 +318,7 @@ static noinline void check_xa_mark(struct xarray *xa) check_xa_mark_1(xa, index); check_xa_mark_2(xa); + check_xa_mark_3(xa); } static noinline void check_xa_shrink(struct xarray *xa) @@ -393,6 +415,9 @@ static noinline void check_cmpxchg(struct xarray *xa) XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE); XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != NULL); XA_BUG_ON(xa, xa_cmpxchg(xa, 5, NULL, FIVE, GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) != -EBUSY); + XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != FIVE); + XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) == -EBUSY); xa_erase_index(xa, 12345678); xa_erase_index(xa, 5); XA_BUG_ON(xa, !xa_empty(xa)); @@ -1618,14 +1643,9 @@ static noinline void shadow_remove(struct xarray *xa) xa_lock(xa); while ((node = list_first_entry_or_null(&shadow_nodes, struct xa_node, private_list))) { - XA_STATE(xas, node->array, 0); XA_BUG_ON(xa, node->array != xa); list_del_init(&node->private_list); - xas.xa_node = xa_parent_locked(node->array, node); - xas.xa_offset = node->offset; - xas.xa_shift = node->shift + XA_CHUNK_SHIFT; - xas_set_update(&xas, test_update_node); - xas_store(&xas, NULL); + xa_delete_node(node, test_update_node); } xa_unlock(xa); } diff --git a/lib/xarray.c b/lib/xarray.c index b76eea7b314c..5fa51614802a 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -706,7 +706,7 @@ void xas_create_range(struct xa_state *xas) unsigned char shift = xas->xa_shift; unsigned char sibs = xas->xa_sibs; - xas->xa_index |= ((sibs + 1) << shift) - 1; + xas->xa_index |= ((sibs + 1UL) << shift) - 1; if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift) xas->xa_offset |= sibs; xas->xa_shift = 0; @@ -2163,6 +2163,29 @@ unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start, } EXPORT_SYMBOL(xa_extract); +/** + * xa_delete_node() - Private interface for workingset code. + * @node: Node to be removed from the tree. + * @update: Function to call to update ancestor nodes. + * + * Context: xa_lock must be held on entry and will not be released. + */ +void xa_delete_node(struct xa_node *node, xa_update_node_t update) +{ + struct xa_state xas = { + .xa = node->array, + .xa_index = (unsigned long)node->offset << + (node->shift + XA_CHUNK_SHIFT), + .xa_shift = node->shift + XA_CHUNK_SHIFT, + .xa_offset = node->offset, + .xa_node = xa_parent_locked(node->array, node), + .xa_update = update, + }; + + xas_store(&xas, NULL); +} +EXPORT_SYMBOL_GPL(xa_delete_node); /* For the benefit of the test suite */ + /** * xa_destroy() - Free all internal data structures. * @xa: XArray. diff --git a/mm/filemap.c b/mm/filemap.c index 1a6beaf69f49..e4101b5bfa82 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2199,6 +2199,14 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT; offset = *ppos & ~PAGE_MASK; + /* + * If we've already successfully copied some data, then we + * can no longer safely return -EIOCBQUEUED. Hence mark + * an async read NOWAIT at that point. + */ + if (written && (iocb->ki_flags & IOCB_WAITQ)) + iocb->ki_flags |= IOCB_NOWAIT; + for (;;) { struct page *page; pgoff_t end_index; diff --git a/mm/readahead.c b/mm/readahead.c index c6ffb76827da..c5b0457415be 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -552,15 +552,23 @@ static void ondemand_readahead(struct readahead_control *ractl, void page_cache_sync_ra(struct readahead_control *ractl, struct file_ra_state *ra, unsigned long req_count) { - /* no read-ahead */ - if (!ra->ra_pages) - return; + bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM); - if (blk_cgroup_congested()) - return; + /* + * Even if read-ahead is disabled, issue this request as read-ahead + * as we'll need it to satisfy the requested range. The forced + * read-ahead will do the right thing and limit the read to just the + * requested range, which we'll set to 1 page for this case. + */ + if (!ra->ra_pages || blk_cgroup_congested()) { + if (!ractl->file) + return; + req_count = 1; + do_forced_ra = true; + } /* be dumb */ - if (ractl->file && (ractl->file->f_mode & FMODE_RANDOM)) { + if (do_forced_ra) { force_page_cache_ra(ractl, ra, req_count); return; } diff --git a/mm/workingset.c b/mm/workingset.c index 8ed8e6296d8c..975a4d2dd02e 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -519,12 +519,11 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, void *arg) __must_hold(lru_lock) { struct xa_node *node = container_of(item, struct xa_node, private_list); - XA_STATE(xas, node->array, 0); struct address_space *mapping; int ret; /* - * Page cache insertions and deletions synchroneously maintain + * Page cache insertions and deletions synchronously maintain * the shadow node LRU under the i_pages lock and the * lru_lock. Because the page cache tree is emptied before * the inode can be destroyed, holding the lru_lock pins any @@ -559,15 +558,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, if (WARN_ON_ONCE(node->count != node->nr_values)) goto out_invalid; mapping->nrexceptional -= node->nr_values; - xas.xa_node = xa_parent_locked(&mapping->i_pages, node); - xas.xa_offset = node->offset; - xas.xa_shift = node->shift + XA_CHUNK_SHIFT; - xas_set_update(&xas, workingset_update_node); - /* - * We could store a shadow entry here which was the minimum of the - * shadow entries we were tracking ... - */ - xas_store(&xas, NULL); + xa_delete_node(node, workingset_update_node); __inc_lruvec_slab_state(node, WORKINGSET_NODERECLAIM); out_invalid: diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d4d7a0e52491..af0f1fa24937 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2016,11 +2016,11 @@ static int process_banner(struct ceph_connection *con) sizeof(con->peer_addr)) != 0 && !(addr_is_blank(&con->actual_peer_addr) && con->actual_peer_addr.nonce == con->peer_addr.nonce)) { - pr_warn("wrong peer, want %s/%d, got %s/%d\n", + pr_warn("wrong peer, want %s/%u, got %s/%u\n", ceph_pr_addr(&con->peer_addr), - (int)le32_to_cpu(con->peer_addr.nonce), + le32_to_cpu(con->peer_addr.nonce), ceph_pr_addr(&con->actual_peer_addr), - (int)le32_to_cpu(con->actual_peer_addr.nonce)); + le32_to_cpu(con->actual_peer_addr.nonce)); con->error_msg = "wrong peer at address"; return -1; } @@ -2811,13 +2811,13 @@ static int queue_con_delay(struct ceph_connection *con, unsigned long delay) return -ENOENT; } + dout("%s %p %lu\n", __func__, con, delay); if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { dout("%s %p - already queued\n", __func__, con); con->ops->put(con); return -EBUSY; } - dout("%s %p %lu\n", __func__, con, delay); return 0; } @@ -2998,6 +2998,11 @@ static void con_fault(struct ceph_connection *con) ceph_msg_put(con->in_msg); con->in_msg = NULL; } + if (con->out_msg) { + BUG_ON(con->out_msg->con != con); + ceph_msg_put(con->out_msg); + con->out_msg = NULL; + } /* Requeue anything that hasn't been acked */ list_splice_init(&con->out_sent, &con->out_queue); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index d633a0aeaa55..c4cf2529d08b 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -896,8 +896,9 @@ static void handle_command_ack(struct ceph_mon_client *monc, ceph_msg_dump(msg); } -int ceph_monc_blacklist_add(struct ceph_mon_client *monc, - struct ceph_entity_addr *client_addr) +static __printf(2, 0) +int do_mon_command_vargs(struct ceph_mon_client *monc, const char *fmt, + va_list ap) { struct ceph_mon_generic_request *req; struct ceph_mon_command *h; @@ -925,29 +926,65 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc, h->monhdr.session_mon_tid = 0; h->fsid = monc->monmap->fsid; h->num_strs = cpu_to_le32(1); - len = sprintf(h->str, "{ \"prefix\": \"osd blacklist\", \ - \"blacklistop\": \"add\", \ - \"addr\": \"%pISpc/%u\" }", - &client_addr->in_addr, le32_to_cpu(client_addr->nonce)); + len = vsprintf(h->str, fmt, ap); h->str_len = cpu_to_le32(len); send_generic_request(monc, req); mutex_unlock(&monc->mutex); ret = wait_generic_request(req); - if (!ret) - /* - * Make sure we have the osdmap that includes the blacklist - * entry. This is needed to ensure that the OSDs pick up the - * new blacklist before processing any future requests from - * this client. - */ - ret = ceph_wait_for_latest_osdmap(monc->client, 0); - out: put_generic_request(req); return ret; } -EXPORT_SYMBOL(ceph_monc_blacklist_add); + +static __printf(2, 3) +int do_mon_command(struct ceph_mon_client *monc, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = do_mon_command_vargs(monc, fmt, ap); + va_end(ap); + return ret; +} + +int ceph_monc_blocklist_add(struct ceph_mon_client *monc, + struct ceph_entity_addr *client_addr) +{ + int ret; + + ret = do_mon_command(monc, + "{ \"prefix\": \"osd blocklist\", \ + \"blocklistop\": \"add\", \ + \"addr\": \"%pISpc/%u\" }", + &client_addr->in_addr, + le32_to_cpu(client_addr->nonce)); + if (ret == -EINVAL) { + /* + * The monitor returns EINVAL on an unrecognized command. + * Try the legacy command -- it is exactly the same except + * for the name. + */ + ret = do_mon_command(monc, + "{ \"prefix\": \"osd blacklist\", \ + \"blacklistop\": \"add\", \ + \"addr\": \"%pISpc/%u\" }", + &client_addr->in_addr, + le32_to_cpu(client_addr->nonce)); + } + if (ret) + return ret; + + /* + * Make sure we have the osdmap that includes the blocklist + * entry. This is needed to ensure that the OSDs pick up the + * new blocklist before processing any future requests from + * this client. + */ + return ceph_wait_for_latest_osdmap(monc->client, 0); +} +EXPORT_SYMBOL(ceph_monc_blocklist_add); /* * Resend pending generic requests. diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 96c25f5e064a..fa08c15be0c0 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -964,6 +964,143 @@ static int decode_pool_names(void **p, void *end, struct ceph_osdmap *map) return -EINVAL; } +/* + * CRUSH workspaces + * + * workspace_manager framework borrowed from fs/btrfs/compression.c. + * Two simplifications: there is only one type of workspace and there + * is always at least one workspace. + */ +static struct crush_work *alloc_workspace(const struct crush_map *c) +{ + struct crush_work *work; + size_t work_size; + + WARN_ON(!c->working_size); + work_size = crush_work_size(c, CEPH_PG_MAX_SIZE); + dout("%s work_size %zu bytes\n", __func__, work_size); + + work = ceph_kvmalloc(work_size, GFP_NOIO); + if (!work) + return NULL; + + INIT_LIST_HEAD(&work->item); + crush_init_workspace(c, work); + return work; +} + +static void free_workspace(struct crush_work *work) +{ + WARN_ON(!list_empty(&work->item)); + kvfree(work); +} + +static void init_workspace_manager(struct workspace_manager *wsm) +{ + INIT_LIST_HEAD(&wsm->idle_ws); + spin_lock_init(&wsm->ws_lock); + atomic_set(&wsm->total_ws, 0); + wsm->free_ws = 0; + init_waitqueue_head(&wsm->ws_wait); +} + +static void add_initial_workspace(struct workspace_manager *wsm, + struct crush_work *work) +{ + WARN_ON(!list_empty(&wsm->idle_ws)); + + list_add(&work->item, &wsm->idle_ws); + atomic_set(&wsm->total_ws, 1); + wsm->free_ws = 1; +} + +static void cleanup_workspace_manager(struct workspace_manager *wsm) +{ + struct crush_work *work; + + while (!list_empty(&wsm->idle_ws)) { + work = list_first_entry(&wsm->idle_ws, struct crush_work, + item); + list_del_init(&work->item); + free_workspace(work); + } + atomic_set(&wsm->total_ws, 0); + wsm->free_ws = 0; +} + +/* + * Finds an available workspace or allocates a new one. If it's not + * possible to allocate a new one, waits until there is one. + */ +static struct crush_work *get_workspace(struct workspace_manager *wsm, + const struct crush_map *c) +{ + struct crush_work *work; + int cpus = num_online_cpus(); + +again: + spin_lock(&wsm->ws_lock); + if (!list_empty(&wsm->idle_ws)) { + work = list_first_entry(&wsm->idle_ws, struct crush_work, + item); + list_del_init(&work->item); + wsm->free_ws--; + spin_unlock(&wsm->ws_lock); + return work; + + } + if (atomic_read(&wsm->total_ws) > cpus) { + DEFINE_WAIT(wait); + + spin_unlock(&wsm->ws_lock); + prepare_to_wait(&wsm->ws_wait, &wait, TASK_UNINTERRUPTIBLE); + if (atomic_read(&wsm->total_ws) > cpus && !wsm->free_ws) + schedule(); + finish_wait(&wsm->ws_wait, &wait); + goto again; + } + atomic_inc(&wsm->total_ws); + spin_unlock(&wsm->ws_lock); + + work = alloc_workspace(c); + if (!work) { + atomic_dec(&wsm->total_ws); + wake_up(&wsm->ws_wait); + + /* + * Do not return the error but go back to waiting. We + * have the inital workspace and the CRUSH computation + * time is bounded so we will get it eventually. + */ + WARN_ON(atomic_read(&wsm->total_ws) < 1); + goto again; + } + return work; +} + +/* + * Puts a workspace back on the list or frees it if we have enough + * idle ones sitting around. + */ +static void put_workspace(struct workspace_manager *wsm, + struct crush_work *work) +{ + spin_lock(&wsm->ws_lock); + if (wsm->free_ws <= num_online_cpus()) { + list_add(&work->item, &wsm->idle_ws); + wsm->free_ws++; + spin_unlock(&wsm->ws_lock); + goto wake; + } + spin_unlock(&wsm->ws_lock); + + free_workspace(work); + atomic_dec(&wsm->total_ws); +wake: + if (wq_has_sleeper(&wsm->ws_wait)) + wake_up(&wsm->ws_wait); +} + /* * osd map */ @@ -981,7 +1118,8 @@ struct ceph_osdmap *ceph_osdmap_alloc(void) map->primary_temp = RB_ROOT; map->pg_upmap = RB_ROOT; map->pg_upmap_items = RB_ROOT; - mutex_init(&map->crush_workspace_mutex); + + init_workspace_manager(&map->crush_wsm); return map; } @@ -989,8 +1127,11 @@ struct ceph_osdmap *ceph_osdmap_alloc(void) void ceph_osdmap_destroy(struct ceph_osdmap *map) { dout("osdmap_destroy %p\n", map); + if (map->crush) crush_destroy(map->crush); + cleanup_workspace_manager(&map->crush_wsm); + while (!RB_EMPTY_ROOT(&map->pg_temp)) { struct ceph_pg_mapping *pg = rb_entry(rb_first(&map->pg_temp), @@ -1029,7 +1170,6 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) kvfree(map->osd_weight); kvfree(map->osd_addr); kvfree(map->osd_primary_affinity); - kvfree(map->crush_workspace); kfree(map); } @@ -1104,26 +1244,22 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max) static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush) { - void *workspace; - size_t work_size; + struct crush_work *work; if (IS_ERR(crush)) return PTR_ERR(crush); - work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE); - dout("%s work_size %zu bytes\n", __func__, work_size); - workspace = ceph_kvmalloc(work_size, GFP_NOIO); - if (!workspace) { + work = alloc_workspace(crush); + if (!work) { crush_destroy(crush); return -ENOMEM; } - crush_init_workspace(crush, workspace); if (map->crush) crush_destroy(map->crush); - kvfree(map->crush_workspace); + cleanup_workspace_manager(&map->crush_wsm); map->crush = crush; - map->crush_workspace = workspace; + add_initial_workspace(&map->crush_wsm, work); return 0; } @@ -2322,6 +2458,7 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x, s64 choose_args_index) { struct crush_choose_arg_map *arg_map; + struct crush_work *work; int r; BUG_ON(result_max > CEPH_PG_MAX_SIZE); @@ -2332,12 +2469,11 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x, arg_map = lookup_choose_arg_map(&map->crush->choose_args, CEPH_DEFAULT_CHOOSE_ARGS); - mutex_lock(&map->crush_workspace_mutex); + work = get_workspace(&map->crush_wsm, map->crush); r = crush_do_rule(map->crush, ruleno, x, result, result_max, - weight, weight_max, map->crush_workspace, + weight, weight_max, work, arg_map ? arg_map->args : NULL); - mutex_unlock(&map->crush_workspace_mutex); - + put_workspace(&map->crush_wsm, work); return r; } diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 195b40c5dae4..3fecad369592 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -5,7 +5,7 @@ NetApp provides this source code under the GPL v2 License. The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. +https://opensource.org/licenses/gpl-license.php. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 62e0b6c1e8cf..3259120462ed 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -47,10 +47,6 @@ # define RPCDBG_FACILITY RPCDBG_CALL #endif -#define dprint_status(t) \ - dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \ - __func__, t->tk_status) - /* * All RPC clients are linked into this list */ @@ -1639,10 +1635,6 @@ call_start(struct rpc_task *task) int idx = task->tk_msg.rpc_proc->p_statidx; trace_rpc_request(task); - dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, - clnt->cl_program->name, clnt->cl_vers, - rpc_proc_name(task), - (RPC_IS_ASYNC(task) ? "async" : "sync")); /* Increment call count (version might not be valid for ping) */ if (clnt->cl_program->version[clnt->cl_vers]) @@ -1658,8 +1650,6 @@ call_start(struct rpc_task *task) static void call_reserve(struct rpc_task *task) { - dprint_status(task); - task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); @@ -1675,8 +1665,6 @@ call_reserveresult(struct rpc_task *task) { int status = task->tk_status; - dprint_status(task); - /* * After a call to xprt_reserve(), we must have either * a request slot or else an error status. @@ -1717,8 +1705,6 @@ call_reserveresult(struct rpc_task *task) static void call_retry_reserve(struct rpc_task *task) { - dprint_status(task); - task->tk_status = 0; task->tk_action = call_reserveresult; xprt_retry_reserve(task); @@ -1730,8 +1716,6 @@ call_retry_reserve(struct rpc_task *task) static void call_refresh(struct rpc_task *task) { - dprint_status(task); - task->tk_action = call_refreshresult; task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; @@ -1746,8 +1730,6 @@ call_refreshresult(struct rpc_task *task) { int status = task->tk_status; - dprint_status(task); - task->tk_status = 0; task->tk_action = call_refresh; switch (status) { @@ -1770,12 +1752,10 @@ call_refreshresult(struct rpc_task *task) if (!task->tk_cred_retry) break; task->tk_cred_retry--; - dprintk("RPC: %5u %s: retry refresh creds\n", - task->tk_pid, __func__); + trace_rpc_retry_refresh_status(task); return; } - dprintk("RPC: %5u %s: refresh creds failed with error %d\n", - task->tk_pid, __func__, status); + trace_rpc_refresh_status(task); rpc_call_rpcerror(task, status); } @@ -1792,8 +1772,6 @@ call_allocate(struct rpc_task *task) const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; int status; - dprint_status(task); - task->tk_status = 0; task->tk_action = call_encode; @@ -1823,6 +1801,7 @@ call_allocate(struct rpc_task *task) req->rq_rcvsize <<= 2; status = xprt->ops->buf_alloc(task); + trace_rpc_buf_alloc(task, status); xprt_inject_disconnect(xprt); if (status == 0) return; @@ -1831,8 +1810,6 @@ call_allocate(struct rpc_task *task) return; } - dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); - if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) { task->tk_action = call_allocate; rpc_delay(task, HZ>>4); @@ -1883,7 +1860,7 @@ call_encode(struct rpc_task *task) { if (!rpc_task_need_encode(task)) goto out; - dprint_status(task); + /* Dequeue task from the receive queue while we're encoding */ xprt_request_dequeue_xprt(task); /* Encode here so that rpcsec_gss can use correct sequence number. */ @@ -1902,8 +1879,7 @@ call_encode(struct rpc_task *task) } else { task->tk_action = call_refresh; task->tk_cred_retry--; - dprintk("RPC: %5u %s: retry refresh creds\n", - task->tk_pid, __func__); + trace_rpc_retry_refresh_status(task); } break; default: @@ -1960,8 +1936,6 @@ call_bind(struct rpc_task *task) return; } - dprint_status(task); - task->tk_action = call_bind_status; if (!xprt_prepare_transmit(task)) return; @@ -1983,8 +1957,6 @@ call_bind_status(struct rpc_task *task) return; } - dprint_status(task); - trace_rpc_bind_status(task); if (task->tk_status >= 0) goto out_next; if (xprt_bound(xprt)) { @@ -1994,12 +1966,10 @@ call_bind_status(struct rpc_task *task) switch (task->tk_status) { case -ENOMEM: - dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); rpc_delay(task, HZ >> 2); goto retry_timeout; case -EACCES: - dprintk("RPC: %5u remote rpcbind: RPC program/version " - "unavailable\n", task->tk_pid); + trace_rpcb_prog_unavail_err(task); /* fail immediately if this is an RPC ping */ if (task->tk_msg.rpc_proc->p_proc == 0) { status = -EOPNOTSUPP; @@ -2016,17 +1986,14 @@ call_bind_status(struct rpc_task *task) case -EAGAIN: goto retry_timeout; case -ETIMEDOUT: - dprintk("RPC: %5u rpcbind request timed out\n", - task->tk_pid); + trace_rpcb_timeout_err(task); goto retry_timeout; case -EPFNOSUPPORT: /* server doesn't support any rpcbind version we know of */ - dprintk("RPC: %5u unrecognized remote rpcbind service\n", - task->tk_pid); + trace_rpcb_bind_version_err(task); break; case -EPROTONOSUPPORT: - dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n", - task->tk_pid); + trace_rpcb_bind_version_err(task); goto retry_timeout; case -ECONNREFUSED: /* connection problems */ case -ECONNRESET: @@ -2037,8 +2004,7 @@ call_bind_status(struct rpc_task *task) case -EHOSTUNREACH: case -ENETUNREACH: case -EPIPE: - dprintk("RPC: %5u remote rpcbind unreachable: %d\n", - task->tk_pid, task->tk_status); + trace_rpcb_unreachable_err(task); if (!RPC_IS_SOFTCONN(task)) { rpc_delay(task, 5*HZ); goto retry_timeout; @@ -2046,8 +2012,7 @@ call_bind_status(struct rpc_task *task) status = task->tk_status; break; default: - dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", - task->tk_pid, -task->tk_status); + trace_rpcb_unrecognized_err(task); } rpc_call_rpcerror(task, status); @@ -2079,10 +2044,6 @@ call_connect(struct rpc_task *task) return; } - dprintk("RPC: %5u call_connect xprt %p %s connected\n", - task->tk_pid, xprt, - (xprt_connected(xprt) ? "is" : "is not")); - task->tk_action = call_connect_status; if (task->tk_status < 0) return; @@ -2110,7 +2071,6 @@ call_connect_status(struct rpc_task *task) return; } - dprint_status(task); trace_rpc_connect_status(task); if (task->tk_status == 0) { @@ -2178,8 +2138,6 @@ call_transmit(struct rpc_task *task) return; } - dprint_status(task); - task->tk_action = call_transmit_status; if (!xprt_prepare_transmit(task)) return; @@ -2214,7 +2172,6 @@ call_transmit_status(struct rpc_task *task) switch (task->tk_status) { default: - dprint_status(task); break; case -EBADMSG: task->tk_status = 0; @@ -2296,8 +2253,6 @@ call_bc_transmit_status(struct rpc_task *task) if (rpc_task_transmitted(task)) task->tk_status = 0; - dprint_status(task); - switch (task->tk_status) { case 0: /* Success */ @@ -2357,8 +2312,6 @@ call_status(struct rpc_task *task) if (!task->tk_msg.rpc_proc->p_proc) trace_xprt_ping(task->tk_xprt, task->tk_status); - dprint_status(task); - status = task->tk_status; if (status >= 0) { task->tk_action = call_decode; @@ -2405,7 +2358,8 @@ call_status(struct rpc_task *task) goto out_exit; } task->tk_action = call_encode; - rpc_check_timeout(task); + if (status != -ECONNRESET && status != -ECONNABORTED) + rpc_check_timeout(task); return; out_exit: rpc_call_rpcerror(task, status); @@ -2433,7 +2387,7 @@ rpc_check_timeout(struct rpc_task *task) if (xprt_adjust_timeout(task->tk_rqstp) == 0) return; - dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); + trace_rpc_timeout_status(task); task->tk_timeouts++; if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) { @@ -2492,8 +2446,6 @@ call_decode(struct rpc_task *task) struct xdr_stream xdr; int err; - dprint_status(task); - if (!task->tk_msg.rpc_proc->p_decode) { task->tk_action = rpc_exit_task; return; @@ -2537,8 +2489,6 @@ call_decode(struct rpc_task *task) case 0: task->tk_action = rpc_exit_task; task->tk_status = rpcauth_unwrap_resp(task, &xdr); - dprintk("RPC: %5u %s result %d\n", - task->tk_pid, __func__, task->tk_status); return; case -EAGAIN: task->tk_status = 0; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 4a67685c83eb..38fe2ce8a5aa 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -31,11 +31,9 @@ #include #include -#include "netns.h" +#include -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_BIND -#endif +#include "netns.h" #define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock" @@ -216,10 +214,6 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, sn->rpcb_is_af_local = is_af_local ? 1 : 0; smp_wmb(); sn->rpcb_users = 1; - dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " - "%p, rpcb_local_clnt4: %p) for net %x%s\n", - sn->rpcb_local_clnt, sn->rpcb_local_clnt4, - net->ns.inum, (net == &init_net) ? " (init_net)" : ""); } /* @@ -261,19 +255,13 @@ static int rpcb_create_local_unix(struct net *net) */ clnt = rpc_create(&args); if (IS_ERR(clnt)) { - dprintk("RPC: failed to create AF_LOCAL rpcbind " - "client (errno %ld).\n", PTR_ERR(clnt)); result = PTR_ERR(clnt); goto out; } clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); - if (IS_ERR(clnt4)) { - dprintk("RPC: failed to bind second program to " - "rpcbind v4 client (errno %ld).\n", - PTR_ERR(clnt4)); + if (IS_ERR(clnt4)) clnt4 = NULL; - } rpcb_set_local(net, clnt, clnt4, true); @@ -309,8 +297,6 @@ static int rpcb_create_local_net(struct net *net) clnt = rpc_create(&args); if (IS_ERR(clnt)) { - dprintk("RPC: failed to create local rpcbind " - "client (errno %ld).\n", PTR_ERR(clnt)); result = PTR_ERR(clnt); goto out; } @@ -321,12 +307,8 @@ static int rpcb_create_local_net(struct net *net) * v4 upcalls. */ clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); - if (IS_ERR(clnt4)) { - dprintk("RPC: failed to bind second program to " - "rpcbind v4 client (errno %ld).\n", - PTR_ERR(clnt4)); + if (IS_ERR(clnt4)) clnt4 = NULL; - } rpcb_set_local(net, clnt, clnt4, false); @@ -403,11 +385,8 @@ static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, stru msg->rpc_resp = &result; error = rpc_call_sync(clnt, msg, flags); - if (error < 0) { - dprintk("RPC: failed to contact local rpcbind " - "server (errno %d).\n", -error); + if (error < 0) return error; - } if (!result) return -EACCES; @@ -461,9 +440,7 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); bool is_set = false; - dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " - "rpcbind\n", (port ? "" : "un"), - prog, vers, prot, port); + trace_pmap_register(prog, vers, prot, port); msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; if (port != 0) { @@ -489,11 +466,6 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); - dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " - "local rpcbind\n", (port ? "" : "un"), - map->r_prog, map->r_vers, - map->r_addr, map->r_netid); - msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; @@ -520,11 +492,6 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); - dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " - "local rpcbind\n", (port ? "" : "un"), - map->r_prog, map->r_vers, - map->r_addr, map->r_netid); - msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; @@ -541,9 +508,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, { struct rpcbind_args *map = msg->rpc_argp; - dprintk("RPC: unregistering [%u, %u, '%s'] with " - "local rpcbind\n", - map->r_prog, map->r_vers, map->r_netid); + trace_rpcb_unregister(map->r_prog, map->r_vers, map->r_netid); map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; @@ -615,6 +580,8 @@ int rpcb_v4_register(struct net *net, const u32 program, const u32 version, if (address == NULL) return rpcb_unregister_all_protofamilies(sn, &msg); + trace_rpcb_register(map.r_prog, map.r_vers, map.r_addr, map.r_netid); + switch (address->sa_family) { case AF_INET: return rpcb_register_inet4(sn, address, &msg); @@ -693,18 +660,12 @@ void rpcb_getport_async(struct rpc_task *task) rcu_read_unlock(); xprt = xprt_get(task->tk_xprt); - dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", - task->tk_pid, __func__, - xprt->servername, clnt->cl_prog, clnt->cl_vers, xprt->prot); - /* Put self on the wait queue to ensure we get notified if * some other task is already attempting to bind the port */ rpc_sleep_on_timeout(&xprt->binding, task, NULL, jiffies + xprt->bind_timeout); if (xprt_test_and_set_binding(xprt)) { - dprintk("RPC: %5u %s: waiting for another binder\n", - task->tk_pid, __func__); xprt_put(xprt); return; } @@ -712,8 +673,6 @@ void rpcb_getport_async(struct rpc_task *task) /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; - dprintk("RPC: %5u %s: already bound\n", - task->tk_pid, __func__); goto bailout_nofree; } @@ -732,20 +691,15 @@ void rpcb_getport_async(struct rpc_task *task) break; default: status = -EAFNOSUPPORT; - dprintk("RPC: %5u %s: bad address family\n", - task->tk_pid, __func__); goto bailout_nofree; } if (proc == NULL) { xprt->bind_index = 0; status = -EPFNOSUPPORT; - dprintk("RPC: %5u %s: no more getport versions available\n", - task->tk_pid, __func__); goto bailout_nofree; } - dprintk("RPC: %5u %s: trying rpcbind version %u\n", - task->tk_pid, __func__, bind_version); + trace_rpcb_getport(clnt, task, bind_version); rpcb_clnt = rpcb_create(xprt->xprt_net, clnt->cl_nodename, @@ -754,16 +708,12 @@ void rpcb_getport_async(struct rpc_task *task) clnt->cl_cred); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", - task->tk_pid, __func__, PTR_ERR(rpcb_clnt)); goto bailout_nofree; } map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS); if (!map) { status = -ENOMEM; - dprintk("RPC: %5u %s: no memory available\n", - task->tk_pid, __func__); goto bailout_release_client; } map->r_prog = clnt->cl_prog; @@ -780,8 +730,6 @@ void rpcb_getport_async(struct rpc_task *task) map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS); if (!map->r_addr) { status = -ENOMEM; - dprintk("RPC: %5u %s: no memory available\n", - task->tk_pid, __func__); goto bailout_free_args; } map->r_owner = ""; @@ -818,34 +766,33 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) { struct rpcbind_args *map = data; struct rpc_xprt *xprt = map->r_xprt; - int status = child->tk_status; + + map->r_status = child->tk_status; /* Garbage reply: retry with a lesser rpcbind version */ - if (status == -EIO) - status = -EPROTONOSUPPORT; + if (map->r_status == -EIO) + map->r_status = -EPROTONOSUPPORT; /* rpcbind server doesn't support this rpcbind protocol version */ - if (status == -EPROTONOSUPPORT) + if (map->r_status == -EPROTONOSUPPORT) xprt->bind_index++; - if (status < 0) { + if (map->r_status < 0) { /* rpcbind server not available on remote host? */ - xprt->ops->set_port(xprt, 0); + map->r_port = 0; + } else if (map->r_port == 0) { /* Requested RPC service wasn't registered on remote host */ - xprt->ops->set_port(xprt, 0); - status = -EACCES; + map->r_status = -EACCES; } else { /* Succeeded */ - xprt->ops->set_port(xprt, map->r_port); - xprt_set_bound(xprt); - status = 0; + map->r_status = 0; } - dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n", - child->tk_pid, status, map->r_port); - - map->r_status = status; + trace_rpcb_setport(child, map->r_status, map->r_port); + xprt->ops->set_port(xprt, map->r_port); + if (map->r_port) + xprt_set_bound(xprt); } /* @@ -858,11 +805,6 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb = data; __be32 *p; - dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, - rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); - p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2); *p++ = cpu_to_be32(rpcb->r_prog); *p++ = cpu_to_be32(rpcb->r_vers); @@ -884,8 +826,6 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, return -EIO; port = be32_to_cpup(p); - dprintk("RPC: %5u PMAP_%s result: %lu\n", req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, port); if (unlikely(port > USHRT_MAX)) return -EIO; @@ -906,11 +846,6 @@ static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr, *boolp = 0; if (*p != xdr_zero) *boolp = 1; - - dprintk("RPC: %5u RPCB_%s call %s\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, - (*boolp ? "succeeded" : "failed")); return 0; } @@ -935,12 +870,6 @@ static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb = data; __be32 *p; - dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, - rpcb->r_prog, rpcb->r_vers, - rpcb->r_netid, rpcb->r_addr); - p = xdr_reserve_space(xdr, (RPCB_program_sz + RPCB_version_sz) << 2); *p++ = cpu_to_be32(rpcb->r_prog); *p = cpu_to_be32(rpcb->r_vers); @@ -970,11 +899,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, * If the returned universal address is a null string, * the requested RPC service was not registered. */ - if (len == 0) { - dprintk("RPC: %5u RPCB reply: program not registered\n", - req->rq_task->tk_pid); + if (len == 0) return 0; - } if (unlikely(len > RPCBIND_MAXUADDRLEN)) goto out_fail; @@ -982,8 +908,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, p = xdr_inline_decode(xdr, len); if (unlikely(p == NULL)) goto out_fail; - dprintk("RPC: %5u RPCB_%s reply: %*pE\n", req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, len, (char *)p); if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len, sap, sizeof(address)) == 0) @@ -993,9 +917,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, return 0; out_fail: - dprintk("RPC: %5u malformed RPCB_%s reply\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name); return -EIO; } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 7eba20a88438..f06d7c315017 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -27,10 +27,6 @@ #include "sunrpc.h" -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -#define RPCDBG_FACILITY RPCDBG_SCHED -#endif - #define CREATE_TRACE_POINTS #include @@ -85,7 +81,6 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { if (list_empty(&task->u.tk_wait.timer_list)) return; - dprintk("RPC: %5u disabling timer\n", task->tk_pid); task->tk_timeout = 0; list_del(&task->u.tk_wait.timer_list); if (list_empty(&queue->timer_list.list)) @@ -111,9 +106,6 @@ static void __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task, unsigned long timeout) { - dprintk("RPC: %5u setting alarm for %u ms\n", - task->tk_pid, jiffies_to_msecs(timeout - jiffies)); - task->tk_timeout = timeout; if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires)) rpc_set_queue_timer(queue, timeout); @@ -216,9 +208,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, /* barrier matches the read in rpc_wake_up_task_queue_locked() */ smp_wmb(); rpc_set_queued(task); - - dprintk("RPC: %5u added to queue %p \"%s\"\n", - task->tk_pid, queue, rpc_qname(queue)); } /* @@ -241,8 +230,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas else list_del(&task->u.tk_wait.list); queue->qlen--; - dprintk("RPC: %5u removed from queue %p \"%s\"\n", - task->tk_pid, queue, rpc_qname(queue)); } static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues) @@ -382,13 +369,9 @@ static void __rpc_do_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, unsigned char queue_priority) { - dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", - task->tk_pid, rpc_qname(q), jiffies); - trace_rpc_task_sleep(task, q); __rpc_add_wait_queue(q, task, queue_priority); - } static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, @@ -510,9 +493,6 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq, struct rpc_wait_queue *queue, struct rpc_task *task) { - dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n", - task->tk_pid, jiffies); - /* Has the task been executed yet? If not, we cannot wake it up! */ if (!RPC_IS_ACTIVATED(task)) { printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); @@ -524,8 +504,6 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq, __rpc_remove_wait_queue(queue, task); rpc_make_runnable(wq, task); - - dprintk("RPC: __rpc_wake_up_task done\n"); } /* @@ -663,8 +641,6 @@ struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq, { struct rpc_task *task = NULL; - dprintk("RPC: wake_up_first(%p \"%s\")\n", - queue, rpc_qname(queue)); spin_lock(&queue->lock); task = __rpc_find_next_queued(queue); if (task != NULL) @@ -770,7 +746,7 @@ static void __rpc_queue_timer_fn(struct work_struct *work) list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) { timeo = task->tk_timeout; if (time_after_eq(now, timeo)) { - dprintk("RPC: %5u timeout\n", task->tk_pid); + trace_rpc_task_timeout(task, task->tk_action); task->tk_status = -ETIMEDOUT; rpc_wake_up_task_queue_locked(queue, task); continue; @@ -885,9 +861,6 @@ static void __rpc_execute(struct rpc_task *task) int task_is_async = RPC_IS_ASYNC(task); int status = 0; - dprintk("RPC: %5u __rpc_execute flags=0x%x\n", - task->tk_pid, task->tk_flags); - WARN_ON_ONCE(RPC_IS_QUEUED(task)); if (RPC_IS_QUEUED(task)) return; @@ -947,7 +920,7 @@ static void __rpc_execute(struct rpc_task *task) return; /* sync task: sleep here */ - dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid); + trace_rpc_task_sync_sleep(task, task->tk_action); status = out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_QUEUED, rpc_wait_bit_killable, TASK_KILLABLE); @@ -963,11 +936,9 @@ static void __rpc_execute(struct rpc_task *task) task->tk_rpc_status = -ERESTARTSYS; rpc_exit(task, -ERESTARTSYS); } - dprintk("RPC: %5u sync task resuming\n", task->tk_pid); + trace_rpc_task_sync_wake(task, task->tk_action); } - dprintk("RPC: %5u return %d, status %d\n", task->tk_pid, status, - task->tk_status); /* Release all resources associated with the task */ rpc_release_task(task); } @@ -1036,8 +1007,6 @@ int rpc_malloc(struct rpc_task *task) return -ENOMEM; buf->len = size; - dprintk("RPC: %5u allocated buffer of size %zu at %p\n", - task->tk_pid, size, buf); rqst->rq_buffer = buf->data; rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; return 0; @@ -1058,9 +1027,6 @@ void rpc_free(struct rpc_task *task) buf = container_of(buffer, struct rpc_buffer, data); size = buf->len; - dprintk("RPC: freeing buffer of size %zu at %p\n", - size, buf); - if (size <= RPC_BUFFER_MAXSIZE) mempool_free(buf, rpc_buffer_mempool); else @@ -1095,9 +1061,6 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_action = rpc_prepare_task; rpc_init_task_statistics(task); - - dprintk("RPC: new task initialized, procpid %u\n", - task_pid_nr(current)); } static struct rpc_task * @@ -1121,7 +1084,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) rpc_init_task(task, setup_data); task->tk_flags |= flags; - dprintk("RPC: allocated task %p\n", task); return task; } @@ -1151,10 +1113,8 @@ static void rpc_free_task(struct rpc_task *task) put_rpccred(task->tk_op_cred); rpc_release_calldata(task->tk_ops, task->tk_calldata); - if (tk_flags & RPC_TASK_DYNAMIC) { - dprintk("RPC: %5u freeing task\n", task->tk_pid); + if (tk_flags & RPC_TASK_DYNAMIC) mempool_free(task, rpc_task_mempool); - } } static void rpc_async_release(struct work_struct *work) @@ -1208,8 +1168,6 @@ EXPORT_SYMBOL_GPL(rpc_put_task_async); static void rpc_release_task(struct rpc_task *task) { - dprintk("RPC: %5u release task\n", task->tk_pid); - WARN_ON_ONCE(RPC_IS_QUEUED(task)); rpc_release_resources_task(task); @@ -1250,7 +1208,6 @@ static int rpciod_start(void) /* * Create the rpciod thread and wait for it to start. */ - dprintk("RPC: creating workqueue rpciod\n"); wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (!wq) goto out_failed; @@ -1275,7 +1232,6 @@ static void rpciod_stop(void) if (rpciod_workqueue == NULL) return; - dprintk("RPC: destroying workqueue rpciod\n"); wq = rpciod_workqueue; rpciod_workqueue = NULL; diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index f6fe2e6cd65a..2f59464e6524 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -4,7 +4,7 @@ NetApp provides this source code under the GPL v2 License. The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. +https://opensource.org/licenses/gpl-license.php. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index be11d672b5b9..3feff529a764 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -19,6 +19,9 @@ #include #include +static void _copy_to_pages(struct page **, size_t, const char *, size_t); + + /* * XDR functions for basic NFS types */ @@ -201,6 +204,88 @@ EXPORT_SYMBOL_GPL(xdr_inline_pages); * Helper routines for doing 'memmove' like operations on a struct xdr_buf */ +/** + * _shift_data_left_pages + * @pages: vector of pages containing both the source and dest memory area. + * @pgto_base: page vector address of destination + * @pgfrom_base: page vector address of source + * @len: number of bytes to copy + * + * Note: the addresses pgto_base and pgfrom_base are both calculated in + * the same way: + * if a memory area starts at byte 'base' in page 'pages[i]', + * then its address is given as (i << PAGE_CACHE_SHIFT) + base + * Alse note: pgto_base must be < pgfrom_base, but the memory areas + * they point to may overlap. + */ +static void +_shift_data_left_pages(struct page **pages, size_t pgto_base, + size_t pgfrom_base, size_t len) +{ + struct page **pgfrom, **pgto; + char *vfrom, *vto; + size_t copy; + + BUG_ON(pgfrom_base <= pgto_base); + + pgto = pages + (pgto_base >> PAGE_SHIFT); + pgfrom = pages + (pgfrom_base >> PAGE_SHIFT); + + pgto_base &= ~PAGE_MASK; + pgfrom_base &= ~PAGE_MASK; + + do { + if (pgto_base >= PAGE_SIZE) { + pgto_base = 0; + pgto++; + } + if (pgfrom_base >= PAGE_SIZE){ + pgfrom_base = 0; + pgfrom++; + } + + copy = len; + if (copy > (PAGE_SIZE - pgto_base)) + copy = PAGE_SIZE - pgto_base; + if (copy > (PAGE_SIZE - pgfrom_base)) + copy = PAGE_SIZE - pgfrom_base; + + vto = kmap_atomic(*pgto); + if (*pgto != *pgfrom) { + vfrom = kmap_atomic(*pgfrom); + memcpy(vto + pgto_base, vfrom + pgfrom_base, copy); + kunmap_atomic(vfrom); + } else + memmove(vto + pgto_base, vto + pgfrom_base, copy); + flush_dcache_page(*pgto); + kunmap_atomic(vto); + + pgto_base += copy; + pgfrom_base += copy; + + } while ((len -= copy) != 0); +} + +static void +_shift_data_left_tail(struct xdr_buf *buf, unsigned int pgto, size_t len) +{ + struct kvec *tail = buf->tail; + + if (len > tail->iov_len) + len = tail->iov_len; + + _copy_to_pages(buf->pages, + buf->page_base + pgto, + (char *)tail->iov_base, + len); + tail->iov_len -= len; + + if (tail->iov_len > 0) + memmove((char *)tail->iov_base, + tail->iov_base + len, + tail->iov_len); +} + /** * _shift_data_right_pages * @pages: vector of pages containing both the source and dest memory area. @@ -266,6 +351,46 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, } while ((len -= copy) != 0); } +static unsigned int +_shift_data_right_tail(struct xdr_buf *buf, unsigned int pgfrom, size_t len) +{ + struct kvec *tail = buf->tail; + unsigned int tailbuf_len; + unsigned int result = 0; + size_t copy; + + tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len; + + /* Shift the tail first */ + if (tailbuf_len != 0) { + unsigned int free_space = tailbuf_len - tail->iov_len; + + if (len < free_space) + free_space = len; + if (len > free_space) + len = free_space; + + tail->iov_len += free_space; + copy = len; + + if (tail->iov_len > len) { + char *p = (char *)tail->iov_base + len; + memmove(p, tail->iov_base, tail->iov_len - free_space); + result += tail->iov_len - free_space; + } else + copy = tail->iov_len; + + /* Copy from the inlined pages into the tail */ + _copy_from_pages((char *)tail->iov_base, + buf->pages, + buf->page_base + pgfrom, + copy); + result += copy; + } + + return result; +} + /** * _copy_to_pages * @pages: array of pages @@ -350,6 +475,38 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) } EXPORT_SYMBOL_GPL(_copy_from_pages); +/** + * _zero_pages + * @pages: array of pages + * @pgbase: beginning page vector address + * @len: length + */ +static void +_zero_pages(struct page **pages, size_t pgbase, size_t len) +{ + struct page **page; + char *vpage; + size_t zero; + + page = pages + (pgbase >> PAGE_SHIFT); + pgbase &= ~PAGE_MASK; + + do { + zero = PAGE_SIZE - pgbase; + if (zero > len) + zero = len; + + vpage = kmap_atomic(*page); + memset(vpage + pgbase, 0, zero); + kunmap_atomic(vpage); + + flush_dcache_page(*page); + pgbase = 0; + page++; + + } while ((len -= zero) != 0); +} + /** * xdr_shrink_bufhead * @buf: xdr_buf @@ -446,39 +603,13 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) static unsigned int xdr_shrink_pagelen(struct xdr_buf *buf, size_t len) { - struct kvec *tail; - size_t copy; unsigned int pglen = buf->page_len; - unsigned int tailbuf_len; unsigned int result; - result = 0; - tail = buf->tail; if (len > buf->page_len) len = buf-> page_len; - tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len; - /* Shift the tail first */ - if (tailbuf_len != 0) { - unsigned int free_space = tailbuf_len - tail->iov_len; - - if (len < free_space) - free_space = len; - tail->iov_len += free_space; - - copy = len; - if (tail->iov_len > len) { - char *p = (char *)tail->iov_base + len; - memmove(p, tail->iov_base, tail->iov_len - len); - result += tail->iov_len - len; - } else - copy = tail->iov_len; - /* Copy from the inlined pages into the tail */ - _copy_from_pages((char *)tail->iov_base, - buf->pages, buf->page_base + pglen - len, - copy); - result += copy; - } + result = _shift_data_right_tail(buf, pglen - len, len); buf->page_len -= len; buf->buflen -= len; /* Have we truncated the message? */ @@ -505,6 +636,19 @@ unsigned int xdr_stream_pos(const struct xdr_stream *xdr) } EXPORT_SYMBOL_GPL(xdr_stream_pos); +/** + * xdr_page_pos - Return the current offset from the start of the xdr pages + * @xdr: pointer to struct xdr_stream + */ +unsigned int xdr_page_pos(const struct xdr_stream *xdr) +{ + unsigned int pos = xdr_stream_pos(xdr); + + WARN_ON(pos < xdr->buf->head[0].iov_len); + return pos - xdr->buf->head[0].iov_len; +} +EXPORT_SYMBOL_GPL(xdr_page_pos); + /** * xdr_init_encode - Initialize a struct xdr_stream for sending data. * @xdr: pointer to xdr_stream struct @@ -825,6 +969,13 @@ static int xdr_set_page_base(struct xdr_stream *xdr, return 0; } +static void xdr_set_page(struct xdr_stream *xdr, unsigned int base, + unsigned int len) +{ + if (xdr_set_page_base(xdr, base, len) < 0) + xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); +} + static void xdr_set_next_page(struct xdr_stream *xdr) { unsigned int newbase; @@ -832,8 +983,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr) newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT; newbase -= xdr->buf->page_base; - if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); + xdr_set_page(xdr, newbase, PAGE_SIZE); } static bool xdr_set_next_buffer(struct xdr_stream *xdr) @@ -841,8 +991,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr) if (xdr->page_ptr != NULL) xdr_set_next_page(xdr); else if (xdr->iov == xdr->buf->head) { - if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); + xdr_set_page(xdr, 0, PAGE_SIZE); } return xdr->p != xdr->end; } @@ -979,10 +1128,25 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) } EXPORT_SYMBOL_GPL(xdr_inline_decode); +static void xdr_realign_pages(struct xdr_stream *xdr) +{ + struct xdr_buf *buf = xdr->buf; + struct kvec *iov = buf->head; + unsigned int cur = xdr_stream_pos(xdr); + unsigned int copied, offset; + + /* Realign pages to current pointer position */ + if (iov->iov_len > cur) { + offset = iov->iov_len - cur; + copied = xdr_shrink_bufhead(buf, offset); + trace_rpc_xdr_alignment(xdr, offset, copied); + xdr->nwords = XDR_QUADLEN(buf->len - cur); + } +} + static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) { struct xdr_buf *buf = xdr->buf; - struct kvec *iov; unsigned int nwords = XDR_QUADLEN(len); unsigned int cur = xdr_stream_pos(xdr); unsigned int copied, offset; @@ -990,15 +1154,7 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) if (xdr->nwords == 0) return 0; - /* Realign pages to current pointer position */ - iov = buf->head; - if (iov->iov_len > cur) { - offset = iov->iov_len - cur; - copied = xdr_shrink_bufhead(buf, offset); - trace_rpc_xdr_alignment(xdr, offset, copied); - xdr->nwords = XDR_QUADLEN(buf->len - cur); - } - + xdr_realign_pages(xdr); if (nwords > xdr->nwords) { nwords = xdr->nwords; len = nwords << 2; @@ -1057,6 +1213,79 @@ unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len) } EXPORT_SYMBOL_GPL(xdr_read_pages); +uint64_t xdr_align_data(struct xdr_stream *xdr, uint64_t offset, uint32_t length) +{ + struct xdr_buf *buf = xdr->buf; + unsigned int from, bytes; + unsigned int shift = 0; + + if ((offset + length) < offset || + (offset + length) > buf->page_len) + length = buf->page_len - offset; + + xdr_realign_pages(xdr); + from = xdr_page_pos(xdr); + bytes = xdr->nwords << 2; + if (length < bytes) + bytes = length; + + /* Move page data to the left */ + if (from > offset) { + shift = min_t(unsigned int, bytes, buf->page_len - from); + _shift_data_left_pages(buf->pages, + buf->page_base + offset, + buf->page_base + from, + shift); + bytes -= shift; + + /* Move tail data into the pages, if necessary */ + if (bytes > 0) + _shift_data_left_tail(buf, offset + shift, bytes); + } + + xdr->nwords -= XDR_QUADLEN(length); + xdr_set_page(xdr, from + length, PAGE_SIZE); + return length; +} +EXPORT_SYMBOL_GPL(xdr_align_data); + +uint64_t xdr_expand_hole(struct xdr_stream *xdr, uint64_t offset, uint64_t length) +{ + struct xdr_buf *buf = xdr->buf; + unsigned int bytes; + unsigned int from; + unsigned int truncated = 0; + + if ((offset + length) < offset || + (offset + length) > buf->page_len) + length = buf->page_len - offset; + + xdr_realign_pages(xdr); + from = xdr_page_pos(xdr); + bytes = xdr->nwords << 2; + + if (offset + length + bytes > buf->page_len) { + unsigned int shift = (offset + length + bytes) - buf->page_len; + unsigned int res = _shift_data_right_tail(buf, from + bytes - shift, shift); + truncated = shift - res; + xdr->nwords -= XDR_QUADLEN(truncated); + bytes -= shift; + } + + /* Now move the page data over and zero pages */ + if (bytes > 0) + _shift_data_right_pages(buf->pages, + buf->page_base + offset + length, + buf->page_base + from, + bytes); + _zero_pages(buf->pages, buf->page_base + offset, length); + + buf->len += length - (from - offset) - truncated; + xdr_set_page(xdr, offset + length, PAGE_SIZE); + return length; +} +EXPORT_SYMBOL_GPL(xdr_expand_hole); + /** * xdr_enter_page - decode data from the XDR page * @xdr: pointer to xdr_stream struct diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5a8e47bbfb9f..f6c17e75f20e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -834,8 +834,7 @@ void xprt_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - dprintk("RPC: %5u xprt_connect xprt %p %s connected\n", task->tk_pid, - xprt, (xprt_connected(xprt) ? "is" : "is not")); + trace_xprt_connect(xprt); if (!xprt_bound(xprt)) { task->tk_status = -EAGAIN; @@ -1131,8 +1130,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - trace_xprt_complete_rqst(xprt, req->rq_xid, copied); - xprt->stat.recvs++; req->rq_private_buf.len = copied; @@ -1269,7 +1266,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task) /* Note: req is added _before_ pos */ list_add_tail(&req->rq_xmit, &pos->rq_xmit); INIT_LIST_HEAD(&req->rq_xmit2); - trace_xprt_enq_xmit(task, 1); goto out; } } else if (RPC_IS_SWAPPER(task)) { @@ -1281,7 +1277,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task) /* Note: req is added _before_ pos */ list_add_tail(&req->rq_xmit, &pos->rq_xmit); INIT_LIST_HEAD(&req->rq_xmit2); - trace_xprt_enq_xmit(task, 2); goto out; } } else if (!req->rq_seqno) { @@ -1290,13 +1285,11 @@ xprt_request_enqueue_transmit(struct rpc_task *task) continue; list_add_tail(&req->rq_xmit2, &pos->rq_xmit2); INIT_LIST_HEAD(&req->rq_xmit); - trace_xprt_enq_xmit(task, 3); goto out; } } list_add_tail(&req->rq_xmit, &xprt->xmit_queue); INIT_LIST_HEAD(&req->rq_xmit2); - trace_xprt_enq_xmit(task, 4); out: set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); spin_unlock(&xprt->queue_lock); @@ -1414,9 +1407,9 @@ bool xprt_prepare_transmit(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); - if (!xprt_lock_write(xprt, task)) { + trace_xprt_transmit_queued(xprt, task); + /* Race breaker: someone may have transmitted us */ if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) rpc_wake_up_queued_task_set_status(&xprt->sending, @@ -1520,10 +1513,13 @@ xprt_transmit(struct rpc_task *task) { struct rpc_rqst *next, *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int status; + int counter, status; spin_lock(&xprt->queue_lock); + counter = 0; while (!list_empty(&xprt->xmit_queue)) { + if (++counter == 20) + break; next = list_first_entry(&xprt->xmit_queue, struct rpc_rqst, rq_xmit); xprt_pin_rqst(next); @@ -1531,7 +1527,6 @@ xprt_transmit(struct rpc_task *task) status = xprt_request_transmit(next, task); if (status == -EBADMSG && next != req) status = 0; - cond_resched(); spin_lock(&xprt->queue_lock); xprt_unpin_rqst(next); if (status == 0) { @@ -1747,8 +1742,8 @@ xprt_request_init(struct rpc_task *task) req->rq_rcv_buf.bvec = NULL; req->rq_release_snd_buf = NULL; xprt_init_majortimeo(task, req); - dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, - req, ntohl(req->rq_xid)); + + trace_xprt_reserve(req); } static void @@ -1838,7 +1833,6 @@ void xprt_release(struct rpc_task *task) if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); - dprintk("RPC: %5u release request %p\n", task->tk_pid, req); if (likely(!bc_prealloc(req))) xprt->ops->free_slot(xprt, req); else diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 7f94c9a19fd3..44888f5badef 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -124,7 +124,7 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) if (IS_ERR(frmr)) goto out_mr_err; - sg = kcalloc(depth, sizeof(*sg), GFP_NOFS); + sg = kmalloc_array(depth, sizeof(*sg), GFP_NOFS); if (!sg) goto out_list_err; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 053c8ab1265a..8915e42240d3 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -413,9 +413,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); snprintf(buf, sizeof(buf), "%4hx", port); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); - - trace_xprtrdma_op_setport(container_of(xprt, struct rpcrdma_xprt, - rx_xprt)); } /** @@ -586,11 +583,9 @@ xprt_rdma_allocate(struct rpc_task *task) rqst->rq_buffer = rdmab_data(req->rl_sendbuf); rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf); - trace_xprtrdma_op_allocate(task, req); return 0; out_fail: - trace_xprtrdma_op_allocate(task, NULL); return -ENOMEM; } @@ -607,8 +602,6 @@ xprt_rdma_free(struct rpc_task *task) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - trace_xprtrdma_op_free(task, req); - if (!list_empty(&req->rl_registered)) frwr_unmap_sync(r_xprt, req); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 554e1bb4c1c7..7090bbee0ec5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -762,10 +762,7 @@ static int xs_nospace(struct rpc_rqst *req) struct sock *sk = transport->inet; int ret = -EAGAIN; - dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", - req->rq_task->tk_pid, - req->rq_slen - transport->xmit.offset, - req->rq_slen); + trace_rpc_socket_nospace(req, transport); /* Protect against races with write_space */ spin_lock(&xprt->transport_lock); diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 8995092d541e..3b796dd5e577 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -523,8 +523,27 @@ static void *ida_random_fn(void *arg) return NULL; } +static void *ida_leak_fn(void *arg) +{ + struct ida *ida = arg; + time_t s = time(NULL); + int i, ret; + + rcu_register_thread(); + + do for (i = 0; i < 1000; i++) { + ret = ida_alloc_range(ida, 128, 128, GFP_KERNEL); + if (ret >= 0) + ida_free(ida, 128); + } while (time(NULL) < s + 2); + + rcu_unregister_thread(); + return NULL; +} + void ida_thread_tests(void) { + DEFINE_IDA(ida); pthread_t threads[20]; int i; @@ -536,6 +555,16 @@ void ida_thread_tests(void) while (i--) pthread_join(threads[i], NULL); + + for (i = 0; i < ARRAY_SIZE(threads); i++) + if (pthread_create(&threads[i], NULL, ida_leak_fn, &ida)) { + perror("creating ida thread"); + exit(1); + } + + while (i--) + pthread_join(threads[i], NULL); + assert(ida_is_empty(&ida)); } void ida_tests(void) diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 4568248222ae..39867fd80c8f 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -22,4 +22,5 @@ #define __releases(x) #define __must_hold(x) +#define EXPORT_PER_CPU_SYMBOL_GPL(x) #endif /* _KERNEL_H */ diff --git a/tools/testing/radix-tree/linux/local_lock.h b/tools/testing/radix-tree/linux/local_lock.h new file mode 100644 index 000000000000..b3cf8b233ca4 --- /dev/null +++ b/tools/testing/radix-tree/linux/local_lock.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_LOCAL_LOCK +#define _LINUX_LOCAL_LOCK +typedef struct { } local_lock_t; + +static inline void local_lock(local_lock_t *lock) { } +static inline void local_unlock(local_lock_t *lock) { } +#define INIT_LOCAL_LOCK(x) { } +#endif diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 34dab4d18744..7ef7067e942c 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -56,8 +56,4 @@ int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long node_maxindex(struct radix_tree_node *); unsigned long shift_maxindex(unsigned int shift); int radix_tree_cpu_dead(unsigned int cpu); -struct radix_tree_preload { - unsigned nr; - struct radix_tree_node *nodes; -}; extern struct radix_tree_preload radix_tree_preloads;