From c955a0cc8a286e5da1ebb88c19201e9bab8c2422 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Tue, 18 May 2021 18:27:52 +0200 Subject: [PATCH 1/3] spi: spi-mem: add automatic poll status functions With STM32 QSPI, it is possible to poll the status register of the device. This could be done to offload the CPU during an operation (erase or program a SPI NAND for example). spi_mem_poll_status API has been added to handle this feature. This new function take care of the offload/non-offload cases. For the non-offload case, use read_poll_timeout() to poll the status in order to release CPU during this phase. For example, previously, when erasing large area, in non-offload case, CPU load can reach ~50%, now it decrease to ~35%. Signed-off-by: Patrice Chotard Signed-off-by: Christophe Kerello Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/r/20210518162754.15940-2-patrice.chotard@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 86 +++++++++++++++++++++++++++++++++++++ include/linux/spi/spi-mem.h | 16 +++++++ 2 files changed, 102 insertions(+) diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index 1513553e4080..177b3e21febf 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -6,6 +6,7 @@ * Author: Boris Brezillon */ #include +#include #include #include #include @@ -743,6 +744,91 @@ static inline struct spi_mem_driver *to_spi_mem_drv(struct device_driver *drv) return container_of(drv, struct spi_mem_driver, spidrv.driver); } +static int spi_mem_read_status(struct spi_mem *mem, + const struct spi_mem_op *op, + u16 *status) +{ + const u8 *bytes = (u8 *)op->data.buf.in; + int ret; + + ret = spi_mem_exec_op(mem, op); + if (ret) + return ret; + + if (op->data.nbytes > 1) + *status = ((u16)bytes[0] << 8) | bytes[1]; + else + *status = bytes[0]; + + return 0; +} + +/** + * spi_mem_poll_status() - Poll memory device status + * @mem: SPI memory device + * @op: the memory operation to execute + * @mask: status bitmask to ckeck + * @match: (status & mask) expected value + * @initial_delay_us: delay in us before starting to poll + * @polling_delay_us: time to sleep between reads in us + * @timeout_ms: timeout in milliseconds + * + * This function polls a status register and returns when + * (status & mask) == match or when the timeout has expired. + * + * Return: 0 in case of success, -ETIMEDOUT in case of error, + * -EOPNOTSUPP if not supported. + */ +int spi_mem_poll_status(struct spi_mem *mem, + const struct spi_mem_op *op, + u16 mask, u16 match, + unsigned long initial_delay_us, + unsigned long polling_delay_us, + u16 timeout_ms) +{ + struct spi_controller *ctlr = mem->spi->controller; + int ret = -EOPNOTSUPP; + int read_status_ret; + u16 status; + + if (op->data.nbytes < 1 || op->data.nbytes > 2 || + op->data.dir != SPI_MEM_DATA_IN) + return -EINVAL; + + if (ctlr->mem_ops && ctlr->mem_ops->poll_status) { + ret = spi_mem_access_start(mem); + if (ret) + return ret; + + ret = ctlr->mem_ops->poll_status(mem, op, mask, match, + initial_delay_us, polling_delay_us, + timeout_ms); + + spi_mem_access_end(mem); + } + + if (ret == -EOPNOTSUPP) { + if (!spi_mem_supports_op(mem, op)) + return ret; + + if (initial_delay_us < 10) + udelay(initial_delay_us); + else + usleep_range((initial_delay_us >> 2) + 1, + initial_delay_us); + + ret = read_poll_timeout(spi_mem_read_status, read_status_ret, + (read_status_ret || ((status) & mask) == match), + polling_delay_us, timeout_ms * 1000, false, mem, + op, &status); + if (read_status_ret) + return read_status_ret; + } + + return ret; +} +EXPORT_SYMBOL_GPL(spi_mem_poll_status); + static int spi_mem_probe(struct spi_device *spi) { struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver); diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 2b65c9edc34e..85e2ff7b840d 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -250,6 +250,9 @@ static inline void *spi_mem_get_drvdata(struct spi_mem *mem) * the currently mapped area), and the caller of * spi_mem_dirmap_write() is responsible for calling it again in * this case. + * @poll_status: poll memory device status until (status & mask) == match or + * when the timeout has expired. It fills the data buffer with + * the last status value. * * This interface should be implemented by SPI controllers providing an * high-level interface to execute SPI memory operation, which is usually the @@ -274,6 +277,12 @@ struct spi_controller_mem_ops { u64 offs, size_t len, void *buf); ssize_t (*dirmap_write)(struct spi_mem_dirmap_desc *desc, u64 offs, size_t len, const void *buf); + int (*poll_status)(struct spi_mem *mem, + const struct spi_mem_op *op, + u16 mask, u16 match, + unsigned long initial_delay_us, + unsigned long polling_rate_us, + unsigned long timeout_ms); }; /** @@ -369,6 +378,13 @@ devm_spi_mem_dirmap_create(struct device *dev, struct spi_mem *mem, void devm_spi_mem_dirmap_destroy(struct device *dev, struct spi_mem_dirmap_desc *desc); +int spi_mem_poll_status(struct spi_mem *mem, + const struct spi_mem_op *op, + u16 mask, u16 match, + unsigned long initial_delay_us, + unsigned long polling_delay_us, + u16 timeout_ms); + int spi_mem_driver_register_with_owner(struct spi_mem_driver *drv, struct module *owner); From 8941cd8d295e40f8ea1c0a5045d6d068b8e33eec Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Tue, 18 May 2021 18:27:53 +0200 Subject: [PATCH 2/3] mtd: spinand: use the spi-mem poll status APIs Make use of spi-mem poll status APIs to let advanced controllers optimize wait operations. This should also fix the high CPU usage for system that don't have a dedicated STATUS poll block logic. Signed-off-by: Patrice Chotard Signed-off-by: Christophe Kerello Reviewed-by: Boris Brezillon Acked-by: Miquel Raynal Link: https://lore.kernel.org/r/20210518162754.15940-3-patrice.chotard@foss.st.com Signed-off-by: Mark Brown --- drivers/mtd/nand/spi/core.c | 45 ++++++++++++++++++++++++++----------- include/linux/mtd/spinand.h | 22 ++++++++++++++++++ 2 files changed, 54 insertions(+), 13 deletions(-) diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 17f63f95f4a2..3131fae0c715 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -473,20 +473,26 @@ static int spinand_erase_op(struct spinand_device *spinand, return spi_mem_exec_op(spinand->spimem, &op); } -static int spinand_wait(struct spinand_device *spinand, u8 *s) +static int spinand_wait(struct spinand_device *spinand, + unsigned long initial_delay_us, + unsigned long poll_delay_us, + u8 *s) { - unsigned long timeo = jiffies + msecs_to_jiffies(400); + struct spi_mem_op op = SPINAND_GET_FEATURE_OP(REG_STATUS, + spinand->scratchbuf); u8 status; int ret; - do { - ret = spinand_read_status(spinand, &status); - if (ret) - return ret; + ret = spi_mem_poll_status(spinand->spimem, &op, STATUS_BUSY, 0, + initial_delay_us, + poll_delay_us, + SPINAND_WAITRDY_TIMEOUT_MS); + if (ret) + return ret; - if (!(status & STATUS_BUSY)) - goto out; - } while (time_before(jiffies, timeo)); + status = *spinand->scratchbuf; + if (!(status & STATUS_BUSY)) + goto out; /* * Extra read, just in case the STATUS_READY bit has changed @@ -526,7 +532,10 @@ static int spinand_reset_op(struct spinand_device *spinand) if (ret) return ret; - return spinand_wait(spinand, NULL); + return spinand_wait(spinand, + SPINAND_RESET_INITIAL_DELAY_US, + SPINAND_RESET_POLL_DELAY_US, + NULL); } static int spinand_lock_block(struct spinand_device *spinand, u8 lock) @@ -549,7 +558,10 @@ static int spinand_read_page(struct spinand_device *spinand, if (ret) return ret; - ret = spinand_wait(spinand, &status); + ret = spinand_wait(spinand, + SPINAND_READ_INITIAL_DELAY_US, + SPINAND_READ_POLL_DELAY_US, + &status); if (ret < 0) return ret; @@ -585,7 +597,10 @@ static int spinand_write_page(struct spinand_device *spinand, if (ret) return ret; - ret = spinand_wait(spinand, &status); + ret = spinand_wait(spinand, + SPINAND_WRITE_INITIAL_DELAY_US, + SPINAND_WRITE_POLL_DELAY_US, + &status); if (!ret && (status & STATUS_PROG_FAILED)) return -EIO; @@ -768,7 +783,11 @@ static int spinand_erase(struct nand_device *nand, const struct nand_pos *pos) if (ret) return ret; - ret = spinand_wait(spinand, &status); + ret = spinand_wait(spinand, + SPINAND_ERASE_INITIAL_DELAY_US, + SPINAND_ERASE_POLL_DELAY_US, + &status); + if (!ret && (status & STATUS_ERASE_FAILED)) ret = -EIO; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 6bb92f26833e..6988956b8492 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -170,6 +170,28 @@ struct spinand_op; struct spinand_device; #define SPINAND_MAX_ID_LEN 4 +/* + * For erase, write and read operation, we got the following timings : + * tBERS (erase) 1ms to 4ms + * tPROG 300us to 400us + * tREAD 25us to 100us + * In order to minimize latency, the min value is divided by 4 for the + * initial delay, and dividing by 20 for the poll delay. + * For reset, 5us/10us/500us if the device is respectively + * reading/programming/erasing when the RESET occurs. Since we always + * issue a RESET when the device is IDLE, 5us is selected for both initial + * and poll delay. + */ +#define SPINAND_READ_INITIAL_DELAY_US 6 +#define SPINAND_READ_POLL_DELAY_US 5 +#define SPINAND_RESET_INITIAL_DELAY_US 5 +#define SPINAND_RESET_POLL_DELAY_US 5 +#define SPINAND_WRITE_INITIAL_DELAY_US 75 +#define SPINAND_WRITE_POLL_DELAY_US 15 +#define SPINAND_ERASE_INITIAL_DELAY_US 250 +#define SPINAND_ERASE_POLL_DELAY_US 50 + +#define SPINAND_WAITRDY_TIMEOUT_MS 400 /** * struct spinand_id - SPI NAND id structure From 86d1c6bbae32122c5f703b2d8acccf5d4258f2bb Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Tue, 18 May 2021 18:27:54 +0200 Subject: [PATCH 3/3] spi: stm32-qspi: add automatic poll status feature STM32 QSPI is able to automatically poll a specified register inside the memory and relieve the CPU from this task. As example, when erasing a large memory area, we got cpu load equal to 50%. This patch allows to perform the same operation with a cpu load around 2%. Signed-off-by: Christophe Kerello Signed-off-by: Patrice Chotard Link: https://lore.kernel.org/r/20210518162754.15940-4-patrice.chotard@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32-qspi.c | 86 ++++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c index 7e640ccc7e77..69fd220a2894 100644 --- a/drivers/spi/spi-stm32-qspi.c +++ b/drivers/spi/spi-stm32-qspi.c @@ -36,6 +36,7 @@ #define CR_FTIE BIT(18) #define CR_SMIE BIT(19) #define CR_TOIE BIT(20) +#define CR_APMS BIT(22) #define CR_PRESC_MASK GENMASK(31, 24) #define QSPI_DCR 0x04 @@ -53,6 +54,7 @@ #define QSPI_FCR 0x0c #define FCR_CTEF BIT(0) #define FCR_CTCF BIT(1) +#define FCR_CSMF BIT(3) #define QSPI_DLR 0x10 @@ -107,6 +109,7 @@ struct stm32_qspi { u32 clk_rate; struct stm32_qspi_flash flash[STM32_QSPI_MAX_NORCHIP]; struct completion data_completion; + struct completion match_completion; u32 fmode; struct dma_chan *dma_chtx; @@ -115,6 +118,7 @@ struct stm32_qspi { u32 cr_reg; u32 dcr_reg; + unsigned long status_timeout; /* * to protect device configuration, could be different between @@ -128,11 +132,20 @@ static irqreturn_t stm32_qspi_irq(int irq, void *dev_id) struct stm32_qspi *qspi = (struct stm32_qspi *)dev_id; u32 cr, sr; + cr = readl_relaxed(qspi->io_base + QSPI_CR); sr = readl_relaxed(qspi->io_base + QSPI_SR); + if (cr & CR_SMIE && sr & SR_SMF) { + /* disable irq */ + cr &= ~CR_SMIE; + writel_relaxed(cr, qspi->io_base + QSPI_CR); + complete(&qspi->match_completion); + + return IRQ_HANDLED; + } + if (sr & (SR_TEF | SR_TCF)) { /* disable irq */ - cr = readl_relaxed(qspi->io_base + QSPI_CR); cr &= ~CR_TCIE & ~CR_TEIE; writel_relaxed(cr, qspi->io_base + QSPI_CR); complete(&qspi->data_completion); @@ -319,6 +332,24 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi, return err; } +static int stm32_qspi_wait_poll_status(struct stm32_qspi *qspi, + const struct spi_mem_op *op) +{ + u32 cr; + + reinit_completion(&qspi->match_completion); + cr = readl_relaxed(qspi->io_base + QSPI_CR); + writel_relaxed(cr | CR_SMIE, qspi->io_base + QSPI_CR); + + if (!wait_for_completion_timeout(&qspi->match_completion, + msecs_to_jiffies(qspi->status_timeout))) + return -ETIMEDOUT; + + writel_relaxed(FCR_CSMF, qspi->io_base + QSPI_FCR); + + return 0; +} + static int stm32_qspi_get_mode(struct stm32_qspi *qspi, u8 buswidth) { if (buswidth == 4) @@ -332,7 +363,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op) struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master); struct stm32_qspi_flash *flash = &qspi->flash[mem->spi->chip_select]; u32 ccr, cr; - int timeout, err = 0; + int timeout, err = 0, err_poll_status = 0; dev_dbg(qspi->dev, "cmd:%#x mode:%d.%d.%d.%d addr:%#llx len:%#x\n", op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth, @@ -378,6 +409,9 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op) if (op->addr.nbytes && qspi->fmode != CCR_FMODE_MM) writel_relaxed(op->addr.val, qspi->io_base + QSPI_AR); + if (qspi->fmode == CCR_FMODE_APM) + err_poll_status = stm32_qspi_wait_poll_status(qspi, op); + err = stm32_qspi_tx(qspi, op); /* @@ -387,7 +421,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op) * byte of device (device size - fifo size). like device size is not * knows, the prefetching is always stop. */ - if (err || qspi->fmode == CCR_FMODE_MM) + if (err || err_poll_status || qspi->fmode == CCR_FMODE_MM) goto abort; /* wait end of tx in indirect mode */ @@ -406,15 +440,49 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op) cr, !(cr & CR_ABORT), 1, STM32_ABT_TIMEOUT_US); - writel_relaxed(FCR_CTCF, qspi->io_base + QSPI_FCR); + writel_relaxed(FCR_CTCF | FCR_CSMF, qspi->io_base + QSPI_FCR); - if (err || timeout) - dev_err(qspi->dev, "%s err:%d abort timeout:%d\n", - __func__, err, timeout); + if (err || err_poll_status || timeout) + dev_err(qspi->dev, "%s err:%d err_poll_status:%d abort timeout:%d\n", + __func__, err, err_poll_status, timeout); return err; } +static int stm32_qspi_poll_status(struct spi_mem *mem, const struct spi_mem_op *op, + u16 mask, u16 match, + unsigned long initial_delay_us, + unsigned long polling_rate_us, + unsigned long timeout_ms) +{ + struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master); + int ret; + + if (!spi_mem_supports_op(mem, op)) + return -EOPNOTSUPP; + + ret = pm_runtime_get_sync(qspi->dev); + if (ret < 0) { + pm_runtime_put_noidle(qspi->dev); + return ret; + } + + mutex_lock(&qspi->lock); + + writel_relaxed(mask, qspi->io_base + QSPI_PSMKR); + writel_relaxed(match, qspi->io_base + QSPI_PSMAR); + qspi->fmode = CCR_FMODE_APM; + qspi->status_timeout = timeout_ms; + + ret = stm32_qspi_send(mem, op); + mutex_unlock(&qspi->lock); + + pm_runtime_mark_last_busy(qspi->dev); + pm_runtime_put_autosuspend(qspi->dev); + + return ret; +} + static int stm32_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op) { struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master); @@ -527,7 +595,7 @@ static int stm32_qspi_setup(struct spi_device *spi) flash->presc = presc; mutex_lock(&qspi->lock); - qspi->cr_reg = 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN; + qspi->cr_reg = CR_APMS | 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN; writel_relaxed(qspi->cr_reg, qspi->io_base + QSPI_CR); /* set dcr fsize to max address */ @@ -607,6 +675,7 @@ static const struct spi_controller_mem_ops stm32_qspi_mem_ops = { .exec_op = stm32_qspi_exec_op, .dirmap_create = stm32_qspi_dirmap_create, .dirmap_read = stm32_qspi_dirmap_read, + .poll_status = stm32_qspi_poll_status, }; static int stm32_qspi_probe(struct platform_device *pdev) @@ -661,6 +730,7 @@ static int stm32_qspi_probe(struct platform_device *pdev) } init_completion(&qspi->data_completion); + init_completion(&qspi->match_completion); qspi->clk = devm_clk_get(dev, NULL); if (IS_ERR(qspi->clk)) {