From 98060484e2328a4d29b62eb42ef31219a4629587 Mon Sep 17 00:00:00 2001 From: afzal mohammed Date: Wed, 4 Mar 2020 06:11:11 +0530 Subject: [PATCH 001/280] xtensa: replace setup_irq() by request_irq() request_irq() is preferred over setup_irq(). Invocations of setup_irq() occur after memory allocators are ready. Per tglx[1], setup_irq() existed in olden days when allocators were not ready by the time early interrupts were initialized. Hence replace setup_irq() by request_irq(). [1] https://lkml.kernel.org/r/alpine.DEB.2.20.1710191609480.1971@nanos Signed-off-by: afzal mohammed Message-Id: <20200304004112.3848-1-afzal.mohd.ma@gmail.com> Signed-off-by: Max Filippov --- arch/xtensa/kernel/smp.c | 8 ++------ arch/xtensa/kernel/time.c | 12 +++++------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 83b244ce61ee..cd85a7a2722b 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -53,16 +53,12 @@ static void system_flush_invalidate_dcache_range(unsigned long start, #define IPI_IRQ 0 static irqreturn_t ipi_interrupt(int irq, void *dev_id); -static struct irqaction ipi_irqaction = { - .handler = ipi_interrupt, - .flags = IRQF_PERCPU, - .name = "ipi", -}; void ipi_init(void) { unsigned irq = irq_create_mapping(NULL, IPI_IRQ); - setup_irq(irq, &ipi_irqaction); + if (request_irq(irq, ipi_interrupt, IRQF_PERCPU, "ipi", NULL)) + pr_err("Failed to request irq %u (ipi)\n", irq); } static inline unsigned int get_core_count(void) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 69db8c93c1f9..77971fe4cc95 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -128,12 +128,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static struct irqaction timer_irqaction = { - .handler = timer_interrupt, - .flags = IRQF_TIMER, - .name = "timer", -}; - void local_timer_setup(unsigned cpu) { struct ccount_timer *timer = &per_cpu(ccount_timer, cpu); @@ -184,6 +178,8 @@ static inline void calibrate_ccount(void) void __init time_init(void) { + int irq; + of_clk_init(NULL); #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT pr_info("Calibrating CPU frequency "); @@ -199,7 +195,9 @@ void __init time_init(void) __func__); clocksource_register_hz(&ccount_clocksource, ccount_freq); local_timer_setup(0); - setup_irq(this_cpu_ptr(&ccount_timer)->evt.irq, &timer_irqaction); + irq = this_cpu_ptr(&ccount_timer)->evt.irq; + if (request_irq(irq, timer_interrupt, IRQF_TIMER, "timer", NULL)) + pr_err("Failed to request irq %d (timer)\n", irq); sched_clock_register(ccount_sched_clock_read, 32, ccount_freq); timer_probe(); } From 44d92b2c40da1a50ce1d6a116732b6724d5c4b74 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 10 Mar 2020 13:59:25 +0900 Subject: [PATCH 002/280] xtensa: remove meaningless export ccflags-y arch/xtensa/boot/Makefile does not define ccflags-y at all. Please do not export ccflags-y because it is meant to be effective only in the current Makefile. Signed-off-by: Masahiro Yamada Message-Id: <20200310045925.25396-1-masahiroy@kernel.org> Signed-off-by: Max Filippov --- arch/xtensa/boot/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile index efb91bfda2b4..1a14d38d9b33 100644 --- a/arch/xtensa/boot/Makefile +++ b/arch/xtensa/boot/Makefile @@ -14,7 +14,6 @@ HOSTFLAGS += -Iarch/$(ARCH)/boot/include BIG_ENDIAN := $(shell echo __XTENSA_EB__ | $(CC) -E - | grep -v "\#") -export ccflags-y export BIG_ENDIAN subdir-y := lib From 4f1885a7b347a905cd9ed7deb6472a9688637432 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Fri, 17 Jan 2020 18:55:22 +0530 Subject: [PATCH 003/280] drivers: char: ipmi: ipmi_msghandler: Pass lockdep expression to RCU lists intf->cmd_rcvrs is traversed with list_for_each_entry_rcu outside an RCU read-side critical section but under the protection of intf->cmd_rcvrs_mutex. ipmi_interfaces is traversed using list_for_each_entry_rcu outside an RCU read-side critical section but under the protection of ipmi_interfaces_mutex. Hence, add the corresponding lockdep expression to the list traversal primitive to silence false-positive lockdep warnings, and harden RCU lists. Add macro for the corresponding lockdep expression to make the code clean and concise. Signed-off-by: Amol Grover Message-Id: <20200117132521.31020-1-frextrite@gmail.com> Acked-by: Paul E. McKenney Tested-by: John Garry Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index cad9563f8f48..64ba16dcb681 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -618,6 +618,8 @@ static DEFINE_MUTEX(ipmidriver_mutex); static LIST_HEAD(ipmi_interfaces); static DEFINE_MUTEX(ipmi_interfaces_mutex); +#define ipmi_interfaces_mutex_held() \ + lockdep_is_held(&ipmi_interfaces_mutex) static struct srcu_struct ipmi_interfaces_srcu; /* @@ -1321,7 +1323,8 @@ static void _ipmi_destroy_user(struct ipmi_user *user) * synchronize_srcu()) then free everything in that list. */ mutex_lock(&intf->cmd_rcvrs_mutex); - list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link) { + list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link, + lockdep_is_held(&intf->cmd_rcvrs_mutex)) { if (rcvr->user == user) { list_del_rcu(&rcvr->link); rcvr->next = rcvrs; @@ -1599,7 +1602,8 @@ static struct cmd_rcvr *find_cmd_rcvr(struct ipmi_smi *intf, { struct cmd_rcvr *rcvr; - list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link) { + list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link, + lockdep_is_held(&intf->cmd_rcvrs_mutex)) { if ((rcvr->netfn == netfn) && (rcvr->cmd == cmd) && (rcvr->chans & (1 << chan))) return rcvr; @@ -1614,7 +1618,8 @@ static int is_cmd_rcvr_exclusive(struct ipmi_smi *intf, { struct cmd_rcvr *rcvr; - list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link) { + list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link, + lockdep_is_held(&intf->cmd_rcvrs_mutex)) { if ((rcvr->netfn == netfn) && (rcvr->cmd == cmd) && (rcvr->chans & chans)) return 0; @@ -3450,7 +3455,8 @@ int ipmi_add_smi(struct module *owner, /* Look for a hole in the numbers. */ i = 0; link = &ipmi_interfaces; - list_for_each_entry_rcu(tintf, &ipmi_interfaces, link) { + list_for_each_entry_rcu(tintf, &ipmi_interfaces, link, + ipmi_interfaces_mutex_held()) { if (tintf->intf_num != i) { link = &tintf->link; break; From 38503943c89f0bafd9e3742f63f872301d44cbea Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 22 Mar 2020 11:12:53 -0700 Subject: [PATCH 004/280] scsi: lpfc: Fix kasan slab-out-of-bounds error in lpfc_unreg_login The following kasan bug was called out: BUG: KASAN: slab-out-of-bounds in lpfc_unreg_login+0x7c/0xc0 [lpfc] Read of size 2 at addr ffff889fc7c50a22 by task lpfc_worker_3/6676 ... Call Trace: dump_stack+0x96/0xe0 ? lpfc_unreg_login+0x7c/0xc0 [lpfc] print_address_description.constprop.6+0x1b/0x220 ? lpfc_unreg_login+0x7c/0xc0 [lpfc] ? lpfc_unreg_login+0x7c/0xc0 [lpfc] __kasan_report.cold.9+0x37/0x7c ? lpfc_unreg_login+0x7c/0xc0 [lpfc] kasan_report+0xe/0x20 lpfc_unreg_login+0x7c/0xc0 [lpfc] lpfc_sli_def_mbox_cmpl+0x334/0x430 [lpfc] ... When processing the completion of a "Reg Rpi" login mailbox command in lpfc_sli_def_mbox_cmpl, a call may be made to lpfc_unreg_login. The vpi is extracted from the completing mailbox context and passed as an input for the next. However, the vpi stored in the mailbox command context is an absolute vpi, which for SLI4 represents both base + offset. When used with a non-zero base component, (function id > 0) this results in an out-of-range access beyond the allocated phba->vpi_ids array. Fix by subtracting the function's base value to get an accurate vpi number. Link: https://lore.kernel.org/r/20200322181304.37655-2-jsmart2021@gmail.com Signed-off-by: James Smart Signed-off-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 0b26b5c0527e..4fc14bebb76e 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -2511,6 +2511,8 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) !pmb->u.mb.mbxStatus) { rpi = pmb->u.mb.un.varWords[0]; vpi = pmb->u.mb.un.varRegLogin.vpi; + if (phba->sli_rev == LPFC_SLI_REV4) + vpi -= phba->sli4_hba.max_cfg_param.vpi_base; lpfc_unreg_login(phba, vpi, rpi, pmb); pmb->vport = vport; pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; From f861f596714bed06069f1109b89e51f3855c4ddf Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 22 Mar 2020 11:12:54 -0700 Subject: [PATCH 005/280] scsi: lpfc: Fix lockdep error - register non-static key The following lockdep error was reported when unloading the lpfc driver: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. ... Call Trace: dump_stack+0x96/0xe0 register_lock_class+0x8b8/0x8c0 ? lockdep_hardirqs_on+0x190/0x280 ? is_dynamic_key+0x150/0x150 ? wait_for_completion_interruptible+0x2a0/0x2a0 ? wake_up_q+0xd0/0xd0 __lock_acquire+0xda/0x21a0 ? register_lock_class+0x8c0/0x8c0 ? synchronize_rcu_expedited+0x500/0x500 ? __call_rcu+0x850/0x850 lock_acquire+0xf3/0x1f0 ? del_timer_sync+0x5/0xb0 del_timer_sync+0x3c/0xb0 ? del_timer_sync+0x5/0xb0 lpfc_pci_remove_one.cold.102+0x8b7/0x935 [lpfc] ... Unloading the driver resulted in a call to del_timer_sync for the cpuhp_poll_timer. However the call to setup the timer had never been made, so the timer structures used by lockdep checking were not initialized. Unconditionally call setup_timer for the cpuhp_poll_timer during driver initialization. Calls to start the timer remain "as needed". Link: https://lore.kernel.org/r/20200322181304.37655-3-jsmart2021@gmail.com Signed-off-by: James Smart Signed-off-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 5 ++--- drivers/scsi/lpfc/lpfc_sli.c | 6 ++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 9d03e9b71efb..6eb3112a45a2 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -11173,11 +11173,9 @@ static void lpfc_cpuhp_add(struct lpfc_hba *phba) rcu_read_lock(); - if (!list_empty(&phba->poll_list)) { - timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0); + if (!list_empty(&phba->poll_list)) mod_timer(&phba->cpuhp_poll_timer, jiffies + msecs_to_jiffies(LPFC_POLL_HB)); - } rcu_read_unlock(); @@ -13145,6 +13143,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) lpfc_sli4_ras_setup(phba); INIT_LIST_HEAD(&phba->poll_list); + timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0); cpuhp_state_add_instance_nocalls(lpfc_cpuhp_state, &phba->cpuhp); return 0; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 4fc14bebb76e..08bf2f0a1065 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -14444,12 +14444,10 @@ static inline void lpfc_sli4_add_to_poll_list(struct lpfc_queue *eq) { struct lpfc_hba *phba = eq->phba; - if (list_empty(&phba->poll_list)) { - timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0); - /* kickstart slowpath processing for this eq */ + /* kickstart slowpath processing if needed */ + if (list_empty(&phba->poll_list)) mod_timer(&phba->cpuhp_poll_timer, jiffies + msecs_to_jiffies(LPFC_POLL_HB)); - } list_add_rcu(&eq->_poll_list, &phba->poll_list); synchronize_rcu(); From e7f4034912d08a5fdfd7bdbcf9e86e885de1de21 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 22 Mar 2020 11:12:55 -0700 Subject: [PATCH 006/280] scsi: lpfc: Fix lpfc overwrite of sg_cnt field in nvmefc_tgt_fcp_req In lpfc_nvmet_prep_fcp_wqe() the line "rsp->sg_cnt = 0" is modifying the transport's data structure. This may result in the transport believing the s/g list was already freed, thus may not unmap/free it properly. Lpfc driver should not modify the transport data structure. The zeroing of the sg_cnt is to avoid use of the transport's sgl in a subsequent loop where the driver builds the necessary requests for the adapter firmware to complete the IO. Change LLDD to use a local copy of the transport sg_cnt when building requests to be passed to the adapter fw. Link: https://lore.kernel.org/r/20200322181304.37655-4-jsmart2021@gmail.com Signed-off-by: James Smart Signed-off-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvmet.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 9dc9afe1c255..ae89d1450912 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -2598,7 +2598,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, union lpfc_wqe128 *wqe; struct ulp_bde64 *bde; dma_addr_t physaddr; - int i, cnt; + int i, cnt, nsegs; int do_pbde; int xc = 1; @@ -2629,6 +2629,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, phba->cfg_nvme_seg_cnt); return NULL; } + nsegs = rsp->sg_cnt; tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; nvmewqe = ctxp->wqeq; @@ -2868,7 +2869,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, wqe->fcp_trsp.rsvd_12_15[0] = 0; /* Use rspbuf, NOT sg list */ - rsp->sg_cnt = 0; + nsegs = 0; sgl->word2 = 0; atomic_inc(&tgtp->xmt_fcp_rsp); break; @@ -2885,7 +2886,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, nvmewqe->drvrTimeout = (phba->fc_ratov * 3) + LPFC_DRVR_TIMEOUT; nvmewqe->context1 = ndlp; - for_each_sg(rsp->sg, sgel, rsp->sg_cnt, i) { + for_each_sg(rsp->sg, sgel, nsegs, i) { physaddr = sg_dma_address(sgel); cnt = sg_dma_len(sgel); sgl->addr_hi = putPaddrHigh(physaddr); From c90b448023582200c54595232e7c004c93d52ae1 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 22 Mar 2020 11:12:56 -0700 Subject: [PATCH 007/280] scsi: lpfc: Fix scsi host template for SLI3 vports SCSI layer sends driver IOs with more s/g segments than driver can handle. This results in "Too many sg segments from dma_map_sg. Config 64, seg_cnt 219" error messages from the lpfc_scsi_prep_dma_buf_s3() routine. The was due to use the driver using individual templates for pport and vport, host reset enabled or not, nvme vs scsi, etc. In the end, there was a combination for a vport that didn't match the pport. Rather than enumerating more templates and more discretionary assignments, revert to a base template that is copied to a template specific to the pport/vport. Then, based on role, attributes and sli type, modify the fields that are different for that port. Added a log message to lpfc_create_port to validate values. Link: https://lore.kernel.org/r/20200322181304.37655-5-jsmart2021@gmail.com Signed-off-by: James Smart Signed-off-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 5 +++ drivers/scsi/lpfc/lpfc_crtn.h | 2 - drivers/scsi/lpfc/lpfc_init.c | 73 +++++++++++++++++++++++------------ drivers/scsi/lpfc/lpfc_scsi.c | 48 ----------------------- 4 files changed, 54 insertions(+), 74 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 357fdec06bae..e940a49f9f02 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -1225,6 +1225,11 @@ struct lpfc_hba { #define LPFC_POLL_SLOWPATH 1 /* called from slowpath */ char os_host_name[MAXHOSTNAMELEN]; + + /* SCSI host template information - for physical port */ + struct scsi_host_template port_template; + /* SCSI host template information - for all vports */ + struct scsi_host_template vport_template; }; static inline struct Scsi_Host * diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index a450477a7e00..a0ef3bac0612 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -404,9 +404,7 @@ void lpfc_free_sysfs_attr(struct lpfc_vport *); extern struct device_attribute *lpfc_hba_attrs[]; extern struct device_attribute *lpfc_vport_attrs[]; extern struct scsi_host_template lpfc_template; -extern struct scsi_host_template lpfc_template_no_hr; extern struct scsi_host_template lpfc_template_nvme; -extern struct scsi_host_template lpfc_vport_template; extern struct fc_function_template lpfc_transport_functions; extern struct fc_function_template lpfc_vport_transport_functions; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 6eb3112a45a2..1dadf247a0aa 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -4231,6 +4231,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) { struct lpfc_vport *vport; struct Scsi_Host *shost = NULL; + struct scsi_host_template *template; int error = 0; int i; uint64_t wwn; @@ -4259,22 +4260,50 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) } } - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) { - if (dev != &phba->pcidev->dev) { - shost = scsi_host_alloc(&lpfc_vport_template, - sizeof(struct lpfc_vport)); + /* Seed template for SCSI host registration */ + if (dev == &phba->pcidev->dev) { + template = &phba->port_template; + + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) { + /* Seed physical port template */ + memcpy(template, &lpfc_template, sizeof(*template)); + + if (use_no_reset_hba) { + /* template is for a no reset SCSI Host */ + template->max_sectors = 0xffff; + template->eh_host_reset_handler = NULL; + } + + /* Template for all vports this physical port creates */ + memcpy(&phba->vport_template, &lpfc_template, + sizeof(*template)); + phba->vport_template.max_sectors = 0xffff; + phba->vport_template.shost_attrs = lpfc_vport_attrs; + phba->vport_template.eh_bus_reset_handler = NULL; + phba->vport_template.eh_host_reset_handler = NULL; + phba->vport_template.vendor_id = 0; + + /* Initialize the host templates with updated value */ + if (phba->sli_rev == LPFC_SLI_REV4) { + template->sg_tablesize = phba->cfg_scsi_seg_cnt; + phba->vport_template.sg_tablesize = + phba->cfg_scsi_seg_cnt; + } else { + template->sg_tablesize = phba->cfg_sg_seg_cnt; + phba->vport_template.sg_tablesize = + phba->cfg_sg_seg_cnt; + } + } else { - if (!use_no_reset_hba) - shost = scsi_host_alloc(&lpfc_template, - sizeof(struct lpfc_vport)); - else - shost = scsi_host_alloc(&lpfc_template_no_hr, - sizeof(struct lpfc_vport)); + /* NVMET is for physical port only */ + memcpy(template, &lpfc_template_nvme, + sizeof(*template)); } - } else if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - shost = scsi_host_alloc(&lpfc_template_nvme, - sizeof(struct lpfc_vport)); + } else { + template = &phba->vport_template; } + + shost = scsi_host_alloc(template, sizeof(struct lpfc_vport)); if (!shost) goto out; @@ -4329,6 +4358,12 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) vport->port_type = LPFC_PHYSICAL_PORT; } + lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP, + "9081 CreatePort TMPLATE type %x TBLsize %d " + "SEGcnt %d/%d\n", + vport->port_type, shost->sg_tablesize, + phba->cfg_scsi_seg_cnt, phba->cfg_sg_seg_cnt); + /* Initialize all internally managed lists. */ INIT_LIST_HEAD(&vport->fc_nodes); INIT_LIST_HEAD(&vport->rcv_buffer_list); @@ -6301,11 +6336,6 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba) * used to create the sg_dma_buf_pool must be dynamically calculated. */ - /* Initialize the host templates the configured values. */ - lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt; - lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt; - lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt; - if (phba->sli_rev == LPFC_SLI_REV4) entry_sz = sizeof(struct sli4_sge); else @@ -6346,7 +6376,7 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba) } lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP, - "9088 sg_tablesize:%d dmabuf_size:%d total_bde:%d\n", + "9088 INIT sg_tablesize:%d dmabuf_size:%d total_bde:%d\n", phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size, phba->cfg_total_seg_cnt); @@ -6816,11 +6846,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt; } - /* Initialize the host templates with the updated values. */ - lpfc_vport_template.sg_tablesize = phba->cfg_scsi_seg_cnt; - lpfc_template.sg_tablesize = phba->cfg_scsi_seg_cnt; - lpfc_template_no_hr.sg_tablesize = phba->cfg_scsi_seg_cnt; - lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP, "9087 sg_seg_cnt:%d dmabuf_size:%d " "total:%d scsi:%d nvme:%d\n", diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 0fc9a242bc65..be62795715f7 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -6023,31 +6023,6 @@ struct scsi_host_template lpfc_template_nvme = { .track_queue_depth = 0, }; -struct scsi_host_template lpfc_template_no_hr = { - .module = THIS_MODULE, - .name = LPFC_DRIVER_NAME, - .proc_name = LPFC_DRIVER_NAME, - .info = lpfc_info, - .queuecommand = lpfc_queuecommand, - .eh_timed_out = fc_eh_timed_out, - .eh_abort_handler = lpfc_abort_handler, - .eh_device_reset_handler = lpfc_device_reset_handler, - .eh_target_reset_handler = lpfc_target_reset_handler, - .eh_bus_reset_handler = lpfc_bus_reset_handler, - .slave_alloc = lpfc_slave_alloc, - .slave_configure = lpfc_slave_configure, - .slave_destroy = lpfc_slave_destroy, - .scan_finished = lpfc_scan_finished, - .this_id = -1, - .sg_tablesize = LPFC_DEFAULT_SG_SEG_CNT, - .cmd_per_lun = LPFC_CMD_PER_LUN, - .shost_attrs = lpfc_hba_attrs, - .max_sectors = 0xFFFFFFFF, - .vendor_id = LPFC_NL_VENDOR_ID, - .change_queue_depth = scsi_change_queue_depth, - .track_queue_depth = 1, -}; - struct scsi_host_template lpfc_template = { .module = THIS_MODULE, .name = LPFC_DRIVER_NAME, @@ -6073,26 +6048,3 @@ struct scsi_host_template lpfc_template = { .change_queue_depth = scsi_change_queue_depth, .track_queue_depth = 1, }; - -struct scsi_host_template lpfc_vport_template = { - .module = THIS_MODULE, - .name = LPFC_DRIVER_NAME, - .proc_name = LPFC_DRIVER_NAME, - .info = lpfc_info, - .queuecommand = lpfc_queuecommand, - .eh_timed_out = fc_eh_timed_out, - .eh_abort_handler = lpfc_abort_handler, - .eh_device_reset_handler = lpfc_device_reset_handler, - .eh_target_reset_handler = lpfc_target_reset_handler, - .slave_alloc = lpfc_slave_alloc, - .slave_configure = lpfc_slave_configure, - .slave_destroy = lpfc_slave_destroy, - .scan_finished = lpfc_scan_finished, - .this_id = -1, - .sg_tablesize = LPFC_DEFAULT_SG_SEG_CNT, - .cmd_per_lun = LPFC_CMD_PER_LUN, - .shost_attrs = lpfc_vport_attrs, - .max_sectors = 0xFFFF, - .change_queue_depth = scsi_change_queue_depth, - .track_queue_depth = 1, -}; From 4cd70891308dfb875ef31060c4a4aa8872630a2e Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 22 Mar 2020 11:12:57 -0700 Subject: [PATCH 008/280] scsi: lpfc: Fix crash after handling a pci error Injecting EEH on a 32GB card is causing kernel oops The pci error handler is doing an IO flush and the offline code is also doing an IO flush. When the 1st flush is complete the hdwq is destroyed (freed), yet the second flush accesses the hdwq and crashes. Added a check in lpfc_sli4_fush_io_rings to check both the HBA_IOQ_FLUSH flag and the hdwq pointer to see if it is already set and not already freed. Link: https://lore.kernel.org/r/20200322181304.37655-6-jsmart2021@gmail.com Signed-off-by: James Smart Signed-off-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 08bf2f0a1065..780ff187e9a3 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4046,6 +4046,11 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba) struct lpfc_iocbq *piocb, *next_iocb; spin_lock_irq(&phba->hbalock); + if (phba->hba_flag & HBA_IOQ_FLUSH || + !phba->sli4_hba.hdwq) { + spin_unlock_irq(&phba->hbalock); + return; + } /* Indicate the I/O queues are flushed */ phba->hba_flag |= HBA_IOQ_FLUSH; spin_unlock_irq(&phba->hbalock); From 1543af381e7b784fbc896047a037b6d9ee6e9b6f Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 22 Mar 2020 11:12:58 -0700 Subject: [PATCH 009/280] scsi: lpfc: Fix update of wq consumer index in lpfc_sli4_wq_release The lpfc_sli4_wq_release() routine iterates for each interim value when updating the wq consuemr index. This wastes cycles and possibly confuses things as thevalue itterates (and the modulo logic is being applied). There's no reason for this. Just set it to the value from the hw. Link: https://lore.kernel.org/r/20200322181304.37655-7-jsmart2021@gmail.com Signed-off-by: James Smart Signed-off-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 780ff187e9a3..52ccaebd6f2c 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -230,25 +230,16 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe128 *wqe) * This routine will update the HBA index of a queue to reflect consumption of * Work Queue Entries by the HBA. When the HBA indicates that it has consumed * an entry the host calls this function to update the queue's internal - * pointers. This routine returns the number of entries that were consumed by - * the HBA. + * pointers. **/ -static uint32_t +static void lpfc_sli4_wq_release(struct lpfc_queue *q, uint32_t index) { - uint32_t released = 0; - /* sanity check on queue memory */ if (unlikely(!q)) - return 0; + return; - if (q->hba_index == index) - return 0; - do { - q->hba_index = ((q->hba_index + 1) % q->entry_count); - released++; - } while (q->hba_index != index); - return released; + q->hba_index = index; } /** From 807e7353d8a7105ce884d22b0dbc034993c6679c Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 22 Mar 2020 11:12:59 -0700 Subject: [PATCH 010/280] scsi: lpfc: Fix crash in target side cable pulls hitting WAIT_FOR_UNREG Kernel is crashing with the following stacktrace: BUG: unable to handle kernel NULL pointer dereference at 00000000000005bc IP: lpfc_nvme_register_port+0x1a8/0x3a0 [lpfc] ... Call Trace: lpfc_nlp_state_cleanup+0x2b2/0x500 [lpfc] lpfc_nlp_set_state+0xd7/0x1a0 [lpfc] lpfc_cmpl_prli_prli_issue+0x1f7/0x450 [lpfc] lpfc_disc_state_machine+0x7a/0x1e0 [lpfc] lpfc_cmpl_els_prli+0x16f/0x1e0 [lpfc] lpfc_sli_sp_handle_rspiocb+0x5b2/0x690 [lpfc] lpfc_sli_handle_slow_ring_event_s4+0x182/0x230 [lpfc] lpfc_do_work+0x87f/0x1570 [lpfc] kthread+0x10d/0x130 ret_from_fork+0x35/0x40 During target side fault injections, it is possible to hit the NLP_WAIT_FOR_UNREG case in lpfc_nvme_remoteport_delete. A prior commit fixed a rebind and delete race condition, but called lpfc_nlp_put unconditionally. This triggered a deletion and the crash. Fix by movng nlp_put to inside the NLP_WAIT_FOR_UNREG case, where the nlp will be being unregistered/removed. Leave the reference if the flag isn't set. Link: https://lore.kernel.org/r/20200322181304.37655-8-jsmart2021@gmail.com Fixes: b15bd3e6212e ("scsi: lpfc: Fix nvme remoteport registration race conditions") Signed-off-by: James Smart Signed-off-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvme.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index f6c8963c915d..32b28651039e 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -382,13 +382,15 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport) if (ndlp->upcall_flags & NLP_WAIT_FOR_UNREG) { ndlp->nrport = NULL; ndlp->upcall_flags &= ~NLP_WAIT_FOR_UNREG; - } - spin_unlock_irq(&vport->phba->hbalock); + spin_unlock_irq(&vport->phba->hbalock); - /* Remove original register reference. The host transport - * won't reference this rport/remoteport any further. - */ - lpfc_nlp_put(ndlp); + /* Remove original register reference. The host transport + * won't reference this rport/remoteport any further. + */ + lpfc_nlp_put(ndlp); + } else { + spin_unlock_irq(&vport->phba->hbalock); + } rport_err: return; From f8e566c0f5e1fd8de33ccec6eb1ff815cd4b0dc3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 24 Mar 2020 23:03:24 -0700 Subject: [PATCH 011/280] xfs: validate the realtime geometry in xfs_validate_sb_common Validate the geometry of the realtime geometry when we mount the filesystem, so that we don't abruptly shut down the filesystem later on. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_sb.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 00266de58954..c526c5e5ab76 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -328,6 +328,38 @@ xfs_validate_sb_common( return -EFSCORRUPTED; } + /* Validate the realtime geometry; stolen from xfs_repair */ + if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE || + sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) { + xfs_notice(mp, + "realtime extent sanity check failed"); + return -EFSCORRUPTED; + } + + if (sbp->sb_rblocks == 0) { + if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 || + sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) { + xfs_notice(mp, + "realtime zeroed geometry check failed"); + return -EFSCORRUPTED; + } + } else { + uint64_t rexts; + uint64_t rbmblocks; + + rexts = div_u64(sbp->sb_rblocks, sbp->sb_rextsize); + rbmblocks = howmany_64(sbp->sb_rextents, + NBBY * sbp->sb_blocksize); + + if (sbp->sb_rextents != rexts || + sbp->sb_rextslog != xfs_highbit32(sbp->sb_rextents) || + sbp->sb_rbmblocks != rbmblocks) { + xfs_notice(mp, + "realtime geometry sanity check failed"); + return -EFSCORRUPTED; + } + } + if (sbp->sb_unit) { if (!xfs_sb_version_hasdalign(sbp) || sbp->sb_unit > sbp->sb_width || From 7ec949212dba350f1dbc339d2db844db68b39725 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 25 Mar 2020 18:18:20 -0700 Subject: [PATCH 012/280] xfs: don't try to write a start record into every iclog The xlog_write() function iterates over iclogs until it completes writing all the log vectors passed in. The ticket tracks whether a start record has been written or not, so only the first iclog gets a start record. We only ever pass single use tickets to xlog_write() so we only ever need to write a start record once per xlog_write() call. Hence we don't need to store whether we should write a start record in the ticket as the callers provide all the information we need to determine if a start record should be written. For the moment, we have to ensure that we clear the XLOG_TIC_INITED appropriately so the code in xfs_log_done() still works correctly for committing transactions. (darrick: Note the slight behavior change that we always deduct the size of the op header from the ticket, even for unmount records) Signed-off-by: Dave Chinner [hch: pass an explicit need_start_rec argument] Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 77 +++++++++++++++++++++---------------------- fs/xfs/xfs_log_cil.c | 2 +- fs/xfs/xfs_log_priv.h | 12 +++---- 3 files changed, 42 insertions(+), 49 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4a53768c5397..33f089aa2a25 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -921,7 +921,7 @@ xfs_log_write_unmount_record( /* remove inited flag, and account for space used */ tic->t_flags = 0; tic->t_curr_res -= sizeof(magic); - error = xlog_write(log, &vec, tic, &lsn, NULL, flags); + error = xlog_write(log, &vec, tic, &lsn, NULL, flags, false); /* * At this point, we're umounting anyway, so there's no point in * transitioning log state to IOERROR. Just continue... @@ -1541,7 +1541,7 @@ xlog_commit_record( ASSERT_ALWAYS(iclog); error = xlog_write(log, &vec, ticket, commitlsnp, iclog, - XLOG_COMMIT_TRANS); + XLOG_COMMIT_TRANS, false); if (error) xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); return error; @@ -2118,23 +2118,21 @@ xlog_print_trans( } /* - * Calculate the potential space needed by the log vector. Each region gets - * its own xlog_op_header_t and may need to be double word aligned. + * Calculate the potential space needed by the log vector. We may need a start + * record, and each region gets its own struct xlog_op_header and may need to be + * double word aligned. */ static int xlog_write_calc_vec_length( struct xlog_ticket *ticket, - struct xfs_log_vec *log_vector) + struct xfs_log_vec *log_vector, + bool need_start_rec) { struct xfs_log_vec *lv; - int headers = 0; + int headers = need_start_rec ? 1 : 0; int len = 0; int i; - /* acct for start rec of xact */ - if (ticket->t_flags & XLOG_TIC_INITED) - headers++; - for (lv = log_vector; lv; lv = lv->lv_next) { /* we don't write ordered log vectors */ if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) @@ -2156,27 +2154,16 @@ xlog_write_calc_vec_length( return len; } -/* - * If first write for transaction, insert start record We can't be trying to - * commit if we are inited. We can't have any "partial_copy" if we are inited. - */ -static int +static void xlog_write_start_rec( struct xlog_op_header *ophdr, struct xlog_ticket *ticket) { - if (!(ticket->t_flags & XLOG_TIC_INITED)) - return 0; - ophdr->oh_tid = cpu_to_be32(ticket->t_tid); ophdr->oh_clientid = ticket->t_clientid; ophdr->oh_len = 0; ophdr->oh_flags = XLOG_START_TRANS; ophdr->oh_res2 = 0; - - ticket->t_flags &= ~XLOG_TIC_INITED; - - return sizeof(struct xlog_op_header); } static xlog_op_header_t * @@ -2365,7 +2352,8 @@ xlog_write( struct xlog_ticket *ticket, xfs_lsn_t *start_lsn, struct xlog_in_core **commit_iclog, - uint flags) + uint flags, + bool need_start_rec) { struct xlog_in_core *iclog = NULL; struct xfs_log_iovec *vecp; @@ -2381,23 +2369,22 @@ xlog_write( *start_lsn = 0; - len = xlog_write_calc_vec_length(ticket, log_vector); /* - * Region headers and bytes are already accounted for. - * We only need to take into account start records and - * split regions in this function. + * Region headers and bytes are already accounted for. We only need to + * take into account start records and split regions in this function. */ - if (ticket->t_flags & XLOG_TIC_INITED) - ticket->t_curr_res -= sizeof(xlog_op_header_t); + if (ticket->t_flags & XLOG_TIC_INITED) { + ticket->t_curr_res -= sizeof(struct xlog_op_header); + ticket->t_flags &= ~XLOG_TIC_INITED; + } /* - * Commit record headers need to be accounted for. These - * come in as separate writes so are easy to detect. + * Commit record headers and unmount records need to be accounted for. + * These come in as separate writes so are easy to detect. */ - if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) - ticket->t_curr_res -= sizeof(xlog_op_header_t); - + if (!need_start_rec) + ticket->t_curr_res -= sizeof(struct xlog_op_header); if (ticket->t_curr_res < 0) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, "ctx ticket reservation ran out. Need to up reservation"); @@ -2405,6 +2392,8 @@ xlog_write( xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); } + len = xlog_write_calc_vec_length(ticket, log_vector, need_start_rec); + index = 0; lv = log_vector; vecp = lv->lv_iovecp; @@ -2431,7 +2420,6 @@ xlog_write( while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { struct xfs_log_iovec *reg; struct xlog_op_header *ophdr; - int start_rec_copy; int copy_len; int copy_off; bool ordered = false; @@ -2447,11 +2435,15 @@ xlog_write( ASSERT(reg->i_len % sizeof(int32_t) == 0); ASSERT((unsigned long)ptr % sizeof(int32_t) == 0); - start_rec_copy = xlog_write_start_rec(ptr, ticket); - if (start_rec_copy) { - record_cnt++; + /* + * Before we start formatting log vectors, we need to + * write a start record. Only do this for the first + * iclog we write to. + */ + if (need_start_rec) { + xlog_write_start_rec(ptr, ticket); xlog_write_adv_cnt(&ptr, &len, &log_offset, - start_rec_copy); + sizeof(struct xlog_op_header)); } ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags); @@ -2483,8 +2475,13 @@ xlog_write( xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); } - copy_len += start_rec_copy + sizeof(xlog_op_header_t); + copy_len += sizeof(struct xlog_op_header); record_cnt++; + if (need_start_rec) { + copy_len += sizeof(struct xlog_op_header); + record_cnt++; + need_start_rec = false; + } data_cnt += contwr ? copy_len : 0; error = xlog_write_copy_finish(log, iclog, flags, diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 64cc0bf2ab3b..e0aeb316ce6c 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -801,7 +801,7 @@ xlog_cil_push_work( lvhdr.lv_iovecp = &lhdr; lvhdr.lv_next = ctx->lv_chain; - error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); + error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0, true); if (error) goto out_abort_free_ticket; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 2b0aec37e73e..b895e16460ee 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -439,14 +439,10 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); void xlog_print_trans(struct xfs_trans *); -int -xlog_write( - struct xlog *log, - struct xfs_log_vec *log_vector, - struct xlog_ticket *tic, - xfs_lsn_t *start_lsn, - struct xlog_in_core **commit_iclog, - uint flags); +int xlog_write(struct xlog *log, struct xfs_log_vec *log_vector, + struct xlog_ticket *tic, xfs_lsn_t *start_lsn, + struct xlog_in_core **commit_iclog, uint flags, + bool need_start_rec); /* * When we crack an atomic LSN, we sample it first so that the value will not From 9590e9c68449055c97e954118699bc9d470d7e30 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 25 Mar 2020 18:18:21 -0700 Subject: [PATCH 013/280] xfs: re-order initial space accounting checks in xlog_write Commit and unmount records records do not need start records to be written, so rearrange the logic in xlog_write() to remove the need to check for XLOG_TIC_INITED to determine if we should account for the space used by a start record. Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 33f089aa2a25..3a4fe4e2e736 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2356,10 +2356,10 @@ xlog_write( bool need_start_rec) { struct xlog_in_core *iclog = NULL; - struct xfs_log_iovec *vecp; - struct xfs_log_vec *lv; + struct xfs_log_vec *lv = log_vector; + struct xfs_log_iovec *vecp = lv->lv_iovecp; + int index = 0; int len; - int index; int partial_copy = 0; int partial_copy_len = 0; int contwr = 0; @@ -2367,24 +2367,16 @@ xlog_write( int data_cnt = 0; int error = 0; - *start_lsn = 0; - - /* - * Region headers and bytes are already accounted for. We only need to - * take into account start records and split regions in this function. + * If this is a commit or unmount transaction, we don't need a start + * record to be written. We do, however, have to account for the + * commit or unmount header that gets written. Hence we always have + * to account for an extra xlog_op_header here. */ - if (ticket->t_flags & XLOG_TIC_INITED) { - ticket->t_curr_res -= sizeof(struct xlog_op_header); + ticket->t_curr_res -= sizeof(struct xlog_op_header); + if (ticket->t_flags & XLOG_TIC_INITED) ticket->t_flags &= ~XLOG_TIC_INITED; - } - /* - * Commit record headers and unmount records need to be accounted for. - * These come in as separate writes so are easy to detect. - */ - if (!need_start_rec) - ticket->t_curr_res -= sizeof(struct xlog_op_header); if (ticket->t_curr_res < 0) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, "ctx ticket reservation ran out. Need to up reservation"); @@ -2393,10 +2385,7 @@ xlog_write( } len = xlog_write_calc_vec_length(ticket, log_vector, need_start_rec); - - index = 0; - lv = log_vector; - vecp = lv->lv_iovecp; + *start_lsn = 0; while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { void *ptr; int log_offset; From dd401770b0ff68f896002649c593bbb9560f916d Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 25 Mar 2020 18:18:21 -0700 Subject: [PATCH 014/280] xfs: refactor and split xfs_log_done() xfs_log_done() does two separate things. Firstly, it triggers commit records to be written for permanent transactions, and secondly it releases or regrants transaction reservation space. Since delayed logging was introduced, transactions no longer write directly to the log, hence they never have the XLOG_TIC_INITED flag cleared on them. Hence transactions never write commit records to the log and only need to modify reservation space. Split up xfs_log_done into two parts, and only call the parts of the operation needed for the context xfs_log_done() is currently being called from. Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 66 +++++++++++++++---------------------------- fs/xfs/xfs_log.h | 4 --- fs/xfs/xfs_log_cil.c | 13 +++++---- fs/xfs/xfs_log_priv.h | 4 +++ fs/xfs/xfs_trans.c | 24 ++++++++-------- 5 files changed, 45 insertions(+), 66 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 3a4fe4e2e736..910bcff6b50a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -487,62 +487,40 @@ xfs_log_reserve( */ /* - * This routine is called when a user of a log manager ticket is done with - * the reservation. If the ticket was ever used, then a commit record for - * the associated transaction is written out as a log operation header with - * no data. The flag XLOG_TIC_INITED is set when the first write occurs with - * a given ticket. If the ticket was one with a permanent reservation, then - * a few operations are done differently. Permanent reservation tickets by - * default don't release the reservation. They just commit the current - * transaction with the belief that the reservation is still needed. A flag - * must be passed in before permanent reservations are actually released. - * When these type of tickets are not released, they need to be set into - * the inited state again. By doing this, a start record will be written - * out when the next write occurs. + * Write a commit record to the log to close off a running log write. */ -xfs_lsn_t -xfs_log_done( - struct xfs_mount *mp, +int +xlog_write_done( + struct xlog *log, struct xlog_ticket *ticket, struct xlog_in_core **iclog, + xfs_lsn_t *lsn) +{ + if (XLOG_FORCED_SHUTDOWN(log)) + return -EIO; + + return xlog_commit_record(log, ticket, iclog, lsn); +} + +/* + * Release or regrant the ticket reservation now the transaction is done with + * it depending on caller context. Rolling transactions need the ticket + * regranted, otherwise we release it completely. + */ +void +xlog_ticket_done( + struct xlog *log, + struct xlog_ticket *ticket, bool regrant) { - struct xlog *log = mp->m_log; - xfs_lsn_t lsn = 0; - - if (XLOG_FORCED_SHUTDOWN(log) || - /* - * If nothing was ever written, don't write out commit record. - * If we get an error, just continue and give back the log ticket. - */ - (((ticket->t_flags & XLOG_TIC_INITED) == 0) && - (xlog_commit_record(log, ticket, iclog, &lsn)))) { - lsn = (xfs_lsn_t) -1; - regrant = false; - } - - - if (!regrant) { + if (!regrant || XLOG_FORCED_SHUTDOWN(log)) { trace_xfs_log_done_nonperm(log, ticket); - - /* - * Release ticket if not permanent reservation or a specific - * request has been made to release a permanent reservation. - */ xlog_ungrant_log_space(log, ticket); } else { trace_xfs_log_done_perm(log, ticket); - xlog_regrant_reserve_log_space(log, ticket); - /* If this ticket was a permanent reservation and we aren't - * trying to release it, reset the inited flags; so next time - * we write, a start record will be written out. - */ - ticket->t_flags |= XLOG_TIC_INITED; } - xfs_log_ticket_put(ticket); - return lsn; } static bool diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index cc77cc36560a..1412d6993f1e 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -105,10 +105,6 @@ struct xfs_log_item; struct xfs_item_ops; struct xfs_trans; -xfs_lsn_t xfs_log_done(struct xfs_mount *mp, - struct xlog_ticket *ticket, - struct xlog_in_core **iclog, - bool regrant); int xfs_log_force(struct xfs_mount *mp, uint flags); int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags, int *log_forced); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index e0aeb316ce6c..666041ef058f 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -839,10 +839,11 @@ xlog_cil_push_work( } spin_unlock(&cil->xc_push_lock); - /* xfs_log_done always frees the ticket on error. */ - commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, false); - if (commit_lsn == -1) - goto out_abort; + error = xlog_write_done(log, tic, &commit_iclog, &commit_lsn); + if (error) + goto out_abort_free_ticket; + + xlog_ticket_done(log, tic, false); spin_lock(&commit_iclog->ic_callback_lock); if (commit_iclog->ic_state == XLOG_STATE_IOERROR) { @@ -875,7 +876,7 @@ xlog_cil_push_work( return; out_abort_free_ticket: - xfs_log_ticket_put(tic); + xlog_ticket_done(log, tic, false); out_abort: ASSERT(XLOG_FORCED_SHUTDOWN(log)); xlog_cil_committed(ctx); @@ -1007,7 +1008,7 @@ xfs_log_commit_cil( if (commit_lsn) *commit_lsn = xc_commit_lsn; - xfs_log_done(mp, tp->t_ticket, NULL, regrant); + xlog_ticket_done(log, tp->t_ticket, regrant); tp->t_ticket = NULL; xfs_trans_unreserve_and_mod_sb(tp); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index b895e16460ee..1f450ea6192c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -443,6 +443,10 @@ int xlog_write(struct xlog *log, struct xfs_log_vec *log_vector, struct xlog_ticket *tic, xfs_lsn_t *start_lsn, struct xlog_in_core **commit_iclog, uint flags, bool need_start_rec); +int xlog_write_done(struct xlog *log, struct xlog_ticket *ticket, + struct xlog_in_core **iclog, xfs_lsn_t *lsn); +void xlog_ticket_done(struct xlog *log, struct xlog_ticket *ticket, + bool regrant); /* * When we crack an atomic LSN, we sample it first so that the value will not diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 1adc6bc53a56..e20c759f4884 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -9,6 +9,7 @@ #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" +#include "xfs_log_priv.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_extent_busy.h" @@ -150,8 +151,9 @@ xfs_trans_reserve( uint blocks, uint rtextents) { - int error = 0; - bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; + struct xfs_mount *mp = tp->t_mountp; + int error = 0; + bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; /* Mark this thread as being in a transaction */ current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); @@ -162,7 +164,7 @@ xfs_trans_reserve( * fail if the count would go below zero. */ if (blocks > 0) { - error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd); + error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd); if (error != 0) { current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); return -ENOSPC; @@ -191,9 +193,9 @@ xfs_trans_reserve( if (tp->t_ticket != NULL) { ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES); - error = xfs_log_regrant(tp->t_mountp, tp->t_ticket); + error = xfs_log_regrant(mp, tp->t_ticket); } else { - error = xfs_log_reserve(tp->t_mountp, + error = xfs_log_reserve(mp, resp->tr_logres, resp->tr_logcount, &tp->t_ticket, XFS_TRANSACTION, @@ -213,7 +215,7 @@ xfs_trans_reserve( * fail if the count would go below zero. */ if (rtextents > 0) { - error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents)); + error = xfs_mod_frextents(mp, -((int64_t)rtextents)); if (error) { error = -ENOSPC; goto undo_log; @@ -229,7 +231,7 @@ xfs_trans_reserve( */ undo_log: if (resp->tr_logres > 0) { - xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, false); + xlog_ticket_done(mp->m_log, tp->t_ticket, false); tp->t_ticket = NULL; tp->t_log_res = 0; tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; @@ -237,7 +239,7 @@ xfs_trans_reserve( undo_blocks: if (blocks > 0) { - xfs_mod_fdblocks(tp->t_mountp, (int64_t)blocks, rsvd); + xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd); tp->t_blk_res = 0; } @@ -1004,9 +1006,7 @@ __xfs_trans_commit( */ xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { - commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, regrant); - if (commit_lsn == -1 && !error) - error = -EIO; + xlog_ticket_done(mp->m_log, tp->t_ticket, regrant); tp->t_ticket = NULL; } current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); @@ -1065,7 +1065,7 @@ xfs_trans_cancel( xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { - xfs_log_done(mp, tp->t_ticket, NULL, false); + xlog_ticket_done(mp->m_log, tp->t_ticket, false); tp->t_ticket = NULL; } From 70e42f2d4797d4d3f09bc0f6df57e8b8c5597e27 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 25 Mar 2020 18:18:22 -0700 Subject: [PATCH 015/280] xfs: kill XLOG_TIC_INITED It is not longer used or checked by anything, so remove the last traces from the log ticket code. Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 4 ---- fs/xfs/xfs_log_priv.h | 6 ++---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 910bcff6b50a..3ccc3ca3c701 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2352,9 +2352,6 @@ xlog_write( * to account for an extra xlog_op_header here. */ ticket->t_curr_res -= sizeof(struct xlog_op_header); - if (ticket->t_flags & XLOG_TIC_INITED) - ticket->t_flags &= ~XLOG_TIC_INITED; - if (ticket->t_curr_res < 0) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, "ctx ticket reservation ran out. Need to up reservation"); @@ -3493,7 +3490,6 @@ xlog_ticket_alloc( tic->t_ocnt = cnt; tic->t_tid = prandom_u32(); tic->t_clientid = client; - tic->t_flags = XLOG_TIC_INITED; if (permanent) tic->t_flags |= XLOG_TIC_PERM_RESERV; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 1f450ea6192c..fd63400fff6d 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -51,13 +51,11 @@ enum xlog_iclog_state { }; /* - * Flags to log ticket + * Log ticket flags */ -#define XLOG_TIC_INITED 0x1 /* has been initialized */ -#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ +#define XLOG_TIC_PERM_RESERV 0x1 /* permanent reservation */ #define XLOG_TIC_FLAGS \ - { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" } /* From 8b41e3f98e6ca17ed54615bb7a419c499d370a85 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 18:18:23 -0700 Subject: [PATCH 016/280] xfs: split xlog_ticket_done Remove xlog_ticket_done and just call the renamed low-level helpers for ungranting or regranting log space directly. To make that a little the reference put on the ticket and all tracing is moved into the actual helpers. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 84 ++++++++++++++----------------------------- fs/xfs/xfs_log_cil.c | 9 +++-- fs/xfs/xfs_log_priv.h | 4 +-- fs/xfs/xfs_trace.h | 14 ++++---- fs/xfs/xfs_trans.c | 9 +++-- 5 files changed, 47 insertions(+), 73 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 3ccc3ca3c701..a22962e35beb 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -66,14 +66,6 @@ xlog_grant_push_ail( struct xlog *log, int need_bytes); STATIC void -xlog_regrant_reserve_log_space( - struct xlog *log, - struct xlog_ticket *ticket); -STATIC void -xlog_ungrant_log_space( - struct xlog *log, - struct xlog_ticket *ticket); -STATIC void xlog_sync( struct xlog *log, struct xlog_in_core *iclog); @@ -502,27 +494,6 @@ xlog_write_done( return xlog_commit_record(log, ticket, iclog, lsn); } -/* - * Release or regrant the ticket reservation now the transaction is done with - * it depending on caller context. Rolling transactions need the ticket - * regranted, otherwise we release it completely. - */ -void -xlog_ticket_done( - struct xlog *log, - struct xlog_ticket *ticket, - bool regrant) -{ - if (!regrant || XLOG_FORCED_SHUTDOWN(log)) { - trace_xfs_log_done_nonperm(log, ticket); - xlog_ungrant_log_space(log, ticket); - } else { - trace_xfs_log_done_perm(log, ticket); - xlog_regrant_reserve_log_space(log, ticket); - } - xfs_log_ticket_put(ticket); -} - static bool __xlog_state_release_iclog( struct xlog *log, @@ -921,8 +892,7 @@ xfs_log_write_unmount_record( if (tic) { trace_xfs_log_umount_write(log, tic); - xlog_ungrant_log_space(log, tic); - xfs_log_ticket_put(tic); + xfs_log_ticket_ungrant(log, tic); } } @@ -2992,19 +2962,18 @@ xlog_state_get_iclog_space( return 0; } /* xlog_state_get_iclog_space */ -/* The first cnt-1 times through here we don't need to - * move the grant write head because the permanent - * reservation has reserved cnt times the unit amount. - * Release part of current permanent unit reservation and - * reset current reservation to be one units worth. Also - * move grant reservation head forward. +/* + * The first cnt-1 times through here we don't need to move the grant write head + * because the permanent reservation has reserved cnt times the unit amount. + * Release part of current permanent unit reservation and reset current + * reservation to be one units worth. Also move grant reservation head forward. */ -STATIC void -xlog_regrant_reserve_log_space( +void +xfs_log_ticket_regrant( struct xlog *log, struct xlog_ticket *ticket) { - trace_xfs_log_regrant_reserve_enter(log, ticket); + trace_xfs_log_ticket_regrant(log, ticket); if (ticket->t_cnt > 0) ticket->t_cnt--; @@ -3016,21 +2985,20 @@ xlog_regrant_reserve_log_space( ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); - trace_xfs_log_regrant_reserve_sub(log, ticket); + trace_xfs_log_ticket_regrant_sub(log, ticket); /* just return if we still have some of the pre-reserved space */ - if (ticket->t_cnt > 0) - return; + if (!ticket->t_cnt) { + xlog_grant_add_space(log, &log->l_reserve_head.grant, + ticket->t_unit_res); + trace_xfs_log_ticket_regrant_exit(log, ticket); - xlog_grant_add_space(log, &log->l_reserve_head.grant, - ticket->t_unit_res); - - trace_xfs_log_regrant_reserve_exit(log, ticket); - - ticket->t_curr_res = ticket->t_unit_res; - xlog_tic_reset_res(ticket); -} /* xlog_regrant_reserve_log_space */ + ticket->t_curr_res = ticket->t_unit_res; + xlog_tic_reset_res(ticket); + } + xfs_log_ticket_put(ticket); +} /* * Give back the space left from a reservation. @@ -3046,18 +3014,19 @@ xlog_regrant_reserve_log_space( * space, the count will stay at zero and the only space remaining will be * in the current reservation field. */ -STATIC void -xlog_ungrant_log_space( +void +xfs_log_ticket_ungrant( struct xlog *log, struct xlog_ticket *ticket) { - int bytes; + int bytes; + + trace_xfs_log_ticket_ungrant(log, ticket); if (ticket->t_cnt > 0) ticket->t_cnt--; - trace_xfs_log_ungrant_enter(log, ticket); - trace_xfs_log_ungrant_sub(log, ticket); + trace_xfs_log_ticket_ungrant_sub(log, ticket); /* * If this is a permanent reservation ticket, we may be able to free @@ -3072,9 +3041,10 @@ xlog_ungrant_log_space( xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes); xlog_grant_sub_space(log, &log->l_write_head.grant, bytes); - trace_xfs_log_ungrant_exit(log, ticket); + trace_xfs_log_ticket_ungrant_exit(log, ticket); xfs_log_space_wake(log->l_mp); + xfs_log_ticket_put(ticket); } /* diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 666041ef058f..0ae187fa9af2 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -843,7 +843,7 @@ xlog_cil_push_work( if (error) goto out_abort_free_ticket; - xlog_ticket_done(log, tic, false); + xfs_log_ticket_ungrant(log, tic); spin_lock(&commit_iclog->ic_callback_lock); if (commit_iclog->ic_state == XLOG_STATE_IOERROR) { @@ -876,7 +876,7 @@ xlog_cil_push_work( return; out_abort_free_ticket: - xlog_ticket_done(log, tic, false); + xfs_log_ticket_ungrant(log, tic); out_abort: ASSERT(XLOG_FORCED_SHUTDOWN(log)); xlog_cil_committed(ctx); @@ -1008,7 +1008,10 @@ xfs_log_commit_cil( if (commit_lsn) *commit_lsn = xc_commit_lsn; - xlog_ticket_done(log, tp->t_ticket, regrant); + if (regrant && !XLOG_FORCED_SHUTDOWN(log)) + xfs_log_ticket_regrant(log, tp->t_ticket); + else + xfs_log_ticket_ungrant(log, tp->t_ticket); tp->t_ticket = NULL; xfs_trans_unreserve_and_mod_sb(tp); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index fd63400fff6d..0941b465de9e 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -443,8 +443,8 @@ int xlog_write(struct xlog *log, struct xfs_log_vec *log_vector, bool need_start_rec); int xlog_write_done(struct xlog *log, struct xlog_ticket *ticket, struct xlog_in_core **iclog, xfs_lsn_t *lsn); -void xlog_ticket_done(struct xlog *log, struct xlog_ticket *ticket, - bool regrant); +void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); +void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); /* * When we crack an atomic LSN, we sample it first so that the value will not diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index efc7751550d9..f7f12d312fd3 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1001,8 +1001,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, DEFINE_EVENT(xfs_loggrant_class, name, \ TP_PROTO(struct xlog *log, struct xlog_ticket *tic), \ TP_ARGS(log, tic)) -DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); -DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep); DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake); @@ -1011,12 +1009,12 @@ DEFINE_LOGGRANT_EVENT(xfs_log_reserve); DEFINE_LOGGRANT_EVENT(xfs_log_reserve_exit); DEFINE_LOGGRANT_EVENT(xfs_log_regrant); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_exit); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); -DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter); -DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); -DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); +DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant); +DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant_sub); +DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant); +DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_sub); +DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_exit); DECLARE_EVENT_CLASS(xfs_log_item_class, TP_PROTO(struct xfs_log_item *lip), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index e20c759f4884..28b983ff8b11 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -231,7 +231,7 @@ xfs_trans_reserve( */ undo_log: if (resp->tr_logres > 0) { - xlog_ticket_done(mp->m_log, tp->t_ticket, false); + xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); tp->t_ticket = NULL; tp->t_log_res = 0; tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; @@ -1006,7 +1006,10 @@ __xfs_trans_commit( */ xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { - xlog_ticket_done(mp->m_log, tp->t_ticket, regrant); + if (regrant && !XLOG_FORCED_SHUTDOWN(mp->m_log)) + xfs_log_ticket_regrant(mp->m_log, tp->t_ticket); + else + xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); tp->t_ticket = NULL; } current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); @@ -1065,7 +1068,7 @@ xfs_trans_cancel( xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { - xlog_ticket_done(mp->m_log, tp->t_ticket, false); + xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); tp->t_ticket = NULL; } From f10e925def9a6d916b291e8c1e704df5a2976f8a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 25 Mar 2020 18:18:23 -0700 Subject: [PATCH 017/280] xfs: merge xlog_commit_record with xlog_write_done xlog_write_done() is just a thin wrapper around xlog_commit_record(), so they can be merged together easily. Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 43 ++++++++++--------------------------------- fs/xfs/xfs_log_cil.c | 2 +- fs/xfs/xfs_log_priv.h | 2 +- 3 files changed, 12 insertions(+), 35 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a22962e35beb..ccbc9d8e6e3c 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -24,13 +24,6 @@ kmem_zone_t *xfs_log_ticket_zone; /* Local miscellaneous function prototypes */ -STATIC int -xlog_commit_record( - struct xlog *log, - struct xlog_ticket *ticket, - struct xlog_in_core **iclog, - xfs_lsn_t *commitlsnp); - STATIC struct xlog * xlog_alloc_log( struct xfs_mount *mp, @@ -478,22 +471,6 @@ xfs_log_reserve( * marked as with WANT_SYNC. */ -/* - * Write a commit record to the log to close off a running log write. - */ -int -xlog_write_done( - struct xlog *log, - struct xlog_ticket *ticket, - struct xlog_in_core **iclog, - xfs_lsn_t *lsn) -{ - if (XLOG_FORCED_SHUTDOWN(log)) - return -EIO; - - return xlog_commit_record(log, ticket, iclog, lsn); -} - static bool __xlog_state_release_iclog( struct xlog *log, @@ -1463,20 +1440,17 @@ xlog_alloc_log( return ERR_PTR(error); } /* xlog_alloc_log */ - /* * Write out the commit record of a transaction associated with the given - * ticket. Return the lsn of the commit record. + * ticket to close off a running log write. Return the lsn of the commit record. */ -STATIC int +int xlog_commit_record( struct xlog *log, struct xlog_ticket *ticket, struct xlog_in_core **iclog, - xfs_lsn_t *commitlsnp) + xfs_lsn_t *lsn) { - struct xfs_mount *mp = log->l_mp; - int error; struct xfs_log_iovec reg = { .i_addr = NULL, .i_len = 0, @@ -1486,12 +1460,15 @@ xlog_commit_record( .lv_niovecs = 1, .lv_iovecp = ®, }; + int error; - ASSERT_ALWAYS(iclog); - error = xlog_write(log, &vec, ticket, commitlsnp, iclog, - XLOG_COMMIT_TRANS, false); + if (XLOG_FORCED_SHUTDOWN(log)) + return -EIO; + + error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS, + false); if (error) - xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); return error; } diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 0ae187fa9af2..e3dd405ea767 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -839,7 +839,7 @@ xlog_cil_push_work( } spin_unlock(&cil->xc_push_lock); - error = xlog_write_done(log, tic, &commit_iclog, &commit_lsn); + error = xlog_commit_record(log, tic, &commit_iclog, &commit_lsn); if (error) goto out_abort_free_ticket; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 0941b465de9e..f4a54469d7d0 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -441,7 +441,7 @@ int xlog_write(struct xlog *log, struct xfs_log_vec *log_vector, struct xlog_ticket *tic, xfs_lsn_t *start_lsn, struct xlog_in_core **commit_iclog, uint flags, bool need_start_rec); -int xlog_write_done(struct xlog *log, struct xlog_ticket *ticket, +int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket, struct xlog_in_core **iclog, xfs_lsn_t *lsn); void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); From 3c702f95909ac6dfb2d3ea1b897c46fc9717d1a5 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 25 Mar 2020 18:18:24 -0700 Subject: [PATCH 018/280] xfs: refactor unmount record writing Separate out the unmount record writing from the rest of the ticket and log state futzing necessary to make it work. This is a no-op, just makes the code cleaner and places the unmount record formatting and writing alongside the commit record formatting and writing code. We can also get rid of the ticket flag clearing before the xlog_write() call because it no longer cares about the state of XLOG_TIC_INITED. Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 49 ++++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ccbc9d8e6e3c..210048ca6edd 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -795,32 +795,44 @@ xlog_wait_on_iclog( } /* - * Final log writes as part of unmount. - * - * Mark the filesystem clean as unmount happens. Note that during relocation - * this routine needs to be executed as part of source-bag while the - * deallocation must not be done until source-end. + * Write out an unmount record using the ticket provided. We have to account for + * the data space used in the unmount ticket as this write is not done from a + * transaction context that has already done the accounting for us. */ - -/* Actually write the unmount record to disk. */ -static void -xfs_log_write_unmount_record( - struct xfs_mount *mp) +static int +xlog_write_unmount_record( + struct xlog *log, + struct xlog_ticket *ticket, + xfs_lsn_t *lsn, + uint flags) { - /* the data section must be 32 bit size aligned */ - struct xfs_unmount_log_format magic = { + struct xfs_unmount_log_format ulf = { .magic = XLOG_UNMOUNT_TYPE, }; struct xfs_log_iovec reg = { - .i_addr = &magic, - .i_len = sizeof(magic), + .i_addr = &ulf, + .i_len = sizeof(ulf), .i_type = XLOG_REG_TYPE_UNMOUNT, }; struct xfs_log_vec vec = { .lv_niovecs = 1, .lv_iovecp = ®, }; - struct xlog *log = mp->m_log; + + /* account for space used by record data */ + ticket->t_curr_res -= sizeof(ulf); + return xlog_write(log, &vec, ticket, lsn, NULL, flags, false); +} + +/* + * Mark the filesystem clean by writing an unmount record to the head of the + * log. + */ +static void +xlog_unmount_write( + struct xlog *log) +{ + struct xfs_mount *mp = log->l_mp; struct xlog_in_core *iclog; struct xlog_ticket *tic = NULL; xfs_lsn_t lsn; @@ -844,10 +856,7 @@ xfs_log_write_unmount_record( flags &= ~XLOG_UNMOUNT_TRANS; } - /* remove inited flag, and account for space used */ - tic->t_flags = 0; - tic->t_curr_res -= sizeof(magic); - error = xlog_write(log, &vec, tic, &lsn, NULL, flags, false); + error = xlog_write_unmount_record(log, tic, &lsn, flags); /* * At this point, we're umounting anyway, so there's no point in * transitioning log state to IOERROR. Just continue... @@ -913,7 +922,7 @@ xfs_log_unmount_write( if (XLOG_FORCED_SHUTDOWN(log)) return; xfs_log_unmount_verify_iclog(log); - xfs_log_write_unmount_record(mp); + xlog_unmount_write(log); } /* From b843299ba5f9a430dd26ecd02ee2fef805f19844 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 25 Mar 2020 18:18:24 -0700 Subject: [PATCH 019/280] xfs: remove some stale comments from the log code Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 59 +++++++++++------------------------------------- 1 file changed, 13 insertions(+), 46 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 210048ca6edd..7d1355a9cc43 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -463,14 +463,6 @@ xfs_log_reserve( return error; } - -/* - * NOTES: - * - * 1. currblock field gets updated at startup and after in-core logs - * marked as with WANT_SYNC. - */ - static bool __xlog_state_release_iclog( struct xlog *log, @@ -1915,7 +1907,7 @@ xlog_dealloc_log( log->l_mp->m_log = NULL; destroy_workqueue(log->l_ioend_workqueue); kmem_free(log); -} /* xlog_dealloc_log */ +} /* * Update counters atomically now that memcpy is done. @@ -2458,14 +2450,6 @@ xlog_write( return error; } - -/***************************************************************************** - * - * State Machine functions - * - ***************************************************************************** - */ - static void xlog_state_activate_iclog( struct xlog_in_core *iclog, @@ -2826,7 +2810,7 @@ xlog_state_done_syncing( */ wake_up_all(&iclog->ic_write_wait); spin_unlock(&log->l_icloglock); - xlog_state_do_callback(log); /* also cleans log */ + xlog_state_do_callback(log); } /* @@ -2946,13 +2930,14 @@ xlog_state_get_iclog_space( *logoffsetp = log_offset; return 0; -} /* xlog_state_get_iclog_space */ +} /* - * The first cnt-1 times through here we don't need to move the grant write head - * because the permanent reservation has reserved cnt times the unit amount. - * Release part of current permanent unit reservation and reset current - * reservation to be one units worth. Also move grant reservation head forward. + * The first cnt-1 times a ticket goes through here we don't need to move the + * grant write head because the permanent reservation has reserved cnt times the + * unit amount. Release part of current permanent unit reservation and reset + * current reservation to be one units worth. Also move grant reservation head + * forward. */ void xfs_log_ticket_regrant( @@ -3034,12 +3019,8 @@ xfs_log_ticket_ungrant( } /* - * Mark the current iclog in the ring as WANT_SYNC and move the current iclog - * pointer to the next iclog in the ring. - * - * When called from xlog_state_get_iclog_space(), the exact size of the iclog - * has not yet been determined, all we know is that we have run out of space in - * the current iclog. + * This routine will mark the current iclog in the ring as WANT_SYNC and move + * the current iclog pointer to the next iclog in the ring. */ STATIC void xlog_state_switch_iclogs( @@ -3084,7 +3065,7 @@ xlog_state_switch_iclogs( } ASSERT(iclog == log->l_iclog); log->l_iclog = iclog->ic_next; -} /* xlog_state_switch_iclogs */ +} /* * Write out all data in the in-core log as of this exact moment in time. @@ -3291,13 +3272,6 @@ xfs_log_force_lsn( return ret; } -/***************************************************************************** - * - * TICKET functions - * - ***************************************************************************** - */ - /* * Free a used ticket when its refcount falls to zero. */ @@ -3454,13 +3428,6 @@ xlog_ticket_alloc( return tic; } - -/****************************************************************************** - * - * Log debug routines - * - ****************************************************************************** - */ #if defined(DEBUG) /* * Make sure that the destination ptr is within the valid data region of @@ -3546,7 +3513,7 @@ xlog_verify_tail_lsn( if (blocks < BTOBB(iclog->ic_offset) + 1) xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); } -} /* xlog_verify_tail_lsn */ +} /* * Perform a number of checks on the iclog before writing to disk. @@ -3649,7 +3616,7 @@ xlog_verify_iclog( } ptr += sizeof(xlog_op_header_t) + op_len; } -} /* xlog_verify_iclog */ +} #endif /* From 108a42358a05312b2128533c6462a3fdeb410bdf Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 24 Mar 2020 20:10:26 -0700 Subject: [PATCH 020/280] xfs: Lower CIL flush limit for large logs The current CIL size aggregation limit is 1/8th the log size. This means for large logs we might be aggregating at least 250MB of dirty objects in memory before the CIL is flushed to the journal. With CIL shadow buffers sitting around, this means the CIL is often consuming >500MB of temporary memory that is all allocated under GFP_NOFS conditions. Flushing the CIL can take some time to do if there is other IO ongoing, and can introduce substantial log force latency by itself. It also pins the memory until the objects are in the AIL and can be written back and reclaimed by shrinkers. Hence this threshold also tends to determine the minimum amount of memory XFS can operate in under heavy modification without triggering the OOM killer. Modify the CIL space limit to prevent such huge amounts of pinned metadata from aggregating. We can have 2MB of log IO in flight at once, so limit aggregation to 16x this size. This threshold was chosen as it little impact on performance (on 16-way fsmark) or log traffic but pins a lot less memory on large logs especially under heavy memory pressure. An aggregation limit of 8x had 5-10% performance degradation and a 50% increase in log throughput for the same workload, so clearly that was too small for highly concurrent workloads on large logs. This was found via trace analysis of AIL behaviour. e.g. insertion from a single CIL flush: xfs_ail_insert: old lsn 0/0 new lsn 1/3033090 type XFS_LI_INODE flags IN_AIL $ grep xfs_ail_insert /mnt/scratch/s.t |grep "new lsn 1/3033090" |wc -l 1721823 $ So there were 1.7 million objects inserted into the AIL from this CIL checkpoint, the first at 2323.392108, the last at 2325.667566 which was the end of the trace (i.e. it hadn't finished). Clearly a major problem. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Allison Collins Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log_priv.h | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index f4a54469d7d0..938edd19a8a6 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -316,13 +316,30 @@ struct xfs_cil { * tries to keep 25% of the log free, so we need to keep below that limit or we * risk running out of free log space to start any new transactions. * - * In order to keep background CIL push efficient, we will set a lower - * threshold at which background pushing is attempted without blocking current - * transaction commits. A separate, higher bound defines when CIL pushes are - * enforced to ensure we stay within our maximum checkpoint size bounds. - * threshold, yet give us plenty of space for aggregation on large logs. + * In order to keep background CIL push efficient, we only need to ensure the + * CIL is large enough to maintain sufficient in-memory relogging to avoid + * repeated physical writes of frequently modified metadata. If we allow the CIL + * to grow to a substantial fraction of the log, then we may be pinning hundreds + * of megabytes of metadata in memory until the CIL flushes. This can cause + * issues when we are running low on memory - pinned memory cannot be reclaimed, + * and the CIL consumes a lot of memory. Hence we need to set an upper physical + * size limit for the CIL that limits the maximum amount of memory pinned by the + * CIL but does not limit performance by reducing relogging efficiency + * significantly. + * + * As such, the CIL push threshold ends up being the smaller of two thresholds: + * - a threshold large enough that it allows CIL to be pushed and progress to be + * made without excessive blocking of incoming transaction commits. This is + * defined to be 12.5% of the log space - half the 25% push threshold of the + * AIL. + * - small enough that it doesn't pin excessive amounts of memory but maintains + * close to peak relogging efficiency. This is defined to be 16x the iclog + * buffer window (32MB) as measurements have shown this to be roughly the + * point of diminishing performance increases under highly concurrent + * modification workloads. */ -#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) +#define XLOG_CIL_SPACE_LIMIT(log) \ + min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4) /* * ticket grant locks, queues and accounting have their own cachlines From 0e7ab7efe77451cba4cbecb6c9f5ef83cf32b36b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 24 Mar 2020 20:10:27 -0700 Subject: [PATCH 021/280] xfs: Throttle commits on delayed background CIL push In certain situations the background CIL push can be indefinitely delayed. While we have workarounds from the obvious cases now, it doesn't solve the underlying issue. This issue is that there is no upper limit on the CIL where we will either force or wait for a background push to start, hence allowing the CIL to grow without bound until it consumes all log space. To fix this, add a new wait queue to the CIL which allows background pushes to wait for the CIL context to be switched out. This happens when the push starts, so it will allow us to block incoming transaction commit completion until the push has started. This will only affect processes that are running modifications, and only when the CIL threshold has been significantly overrun. This has no apparent impact on performance, and doesn't even trigger until over 45 million inodes had been created in a 16-way fsmark test on a 2GB log. That was limiting at 64MB of log space used, so the active CIL size is only about 3% of the total log in that case. The concurrent removal of those files did not trigger the background sleep at all. Signed-off-by: Dave Chinner Reviewed-by: Allison Collins Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log_cil.c | 37 +++++++++++++++++++++++++++++++++---- fs/xfs/xfs_log_priv.h | 24 ++++++++++++++++++++++++ fs/xfs/xfs_trace.h | 1 + 3 files changed, 58 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index e3dd405ea767..b43f0e8f43f2 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -668,6 +668,11 @@ xlog_cil_push_work( push_seq = cil->xc_push_seq; ASSERT(push_seq <= ctx->sequence); + /* + * Wake up any background push waiters now this context is being pushed. + */ + wake_up_all(&ctx->push_wait); + /* * Check if we've anything to push. If there is nothing, then we don't * move on to a new sequence number and so we have to be able to push @@ -744,6 +749,7 @@ xlog_cil_push_work( */ INIT_LIST_HEAD(&new_ctx->committing); INIT_LIST_HEAD(&new_ctx->busy_extents); + init_waitqueue_head(&new_ctx->push_wait); new_ctx->sequence = ctx->sequence + 1; new_ctx->cil = cil; cil->xc_ctx = new_ctx; @@ -891,7 +897,7 @@ xlog_cil_push_work( */ static void xlog_cil_push_background( - struct xlog *log) + struct xlog *log) __releases(cil->xc_ctx_lock) { struct xfs_cil *cil = log->l_cilp; @@ -905,14 +911,36 @@ xlog_cil_push_background( * don't do a background push if we haven't used up all the * space available yet. */ - if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) + if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) { + up_read(&cil->xc_ctx_lock); return; + } spin_lock(&cil->xc_push_lock); if (cil->xc_push_seq < cil->xc_current_sequence) { cil->xc_push_seq = cil->xc_current_sequence; queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); } + + /* + * Drop the context lock now, we can't hold that if we need to sleep + * because we are over the blocking threshold. The push_lock is still + * held, so blocking threshold sleep/wakeup is still correctly + * serialised here. + */ + up_read(&cil->xc_ctx_lock); + + /* + * If we are well over the space limit, throttle the work that is being + * done until the push work on this context has begun. + */ + if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) { + trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket); + ASSERT(cil->xc_ctx->space_used < log->l_logsize); + xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock); + return; + } + spin_unlock(&cil->xc_push_lock); } @@ -1032,9 +1060,9 @@ xfs_log_commit_cil( if (lip->li_ops->iop_committing) lip->li_ops->iop_committing(lip, xc_commit_lsn); } - xlog_cil_push_background(log); - up_read(&cil->xc_ctx_lock); + /* xlog_cil_push_background() releases cil->xc_ctx_lock */ + xlog_cil_push_background(log); } /* @@ -1193,6 +1221,7 @@ xlog_cil_init( INIT_LIST_HEAD(&ctx->committing); INIT_LIST_HEAD(&ctx->busy_extents); + init_waitqueue_head(&ctx->push_wait); ctx->sequence = 1; ctx->cil = cil; cil->xc_ctx = ctx; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 938edd19a8a6..ec22c7a3867f 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -240,6 +240,7 @@ struct xfs_cil_ctx { struct xfs_log_vec *lv_chain; /* logvecs being pushed */ struct list_head iclog_entry; struct list_head committing; /* ctx committing list */ + wait_queue_head_t push_wait; /* background push throttle */ struct work_struct discard_endio_work; }; @@ -337,10 +338,33 @@ struct xfs_cil { * buffer window (32MB) as measurements have shown this to be roughly the * point of diminishing performance increases under highly concurrent * modification workloads. + * + * To prevent the CIL from overflowing upper commit size bounds, we introduce a + * new threshold at which we block committing transactions until the background + * CIL commit commences and switches to a new context. While this is not a hard + * limit, it forces the process committing a transaction to the CIL to block and + * yeild the CPU, giving the CIL push work a chance to be scheduled and start + * work. This prevents a process running lots of transactions from overfilling + * the CIL because it is not yielding the CPU. We set the blocking limit at + * twice the background push space threshold so we keep in line with the AIL + * push thresholds. + * + * Note: this is not a -hard- limit as blocking is applied after the transaction + * is inserted into the CIL and the push has been triggered. It is largely a + * throttling mechanism that allows the CIL push to be scheduled and run. A hard + * limit will be difficult to implement without introducing global serialisation + * in the CIL commit fast path, and it's not at all clear that we actually need + * such hard limits given the ~7 years we've run without a hard limit before + * finding the first situation where a checkpoint size overflow actually + * occurred. Hence the simple throttle, and an ASSERT check to tell us that + * we've overrun the max size. */ #define XLOG_CIL_SPACE_LIMIT(log) \ min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4) +#define XLOG_CIL_BLOCKING_SPACE_LIMIT(log) \ + (XLOG_CIL_SPACE_LIMIT(log) * 2) + /* * ticket grant locks, queues and accounting have their own cachlines * as these are quite hot and can be operated on concurrently. diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f7f12d312fd3..a4323a63438d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1015,6 +1015,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant_sub); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_sub); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_cil_wait); DECLARE_EVENT_CLASS(xfs_log_item_class, TP_PROTO(struct xfs_log_item *lip), From 2def2845cc33390e39b51440508043e4981e10ee Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 24 Mar 2020 20:10:27 -0700 Subject: [PATCH 022/280] xfs: don't allow log IO to be throttled Running metadata intensive workloads, I've been seeing the AIL pushing getting stuck on pinned buffers and triggering log forces. The log force is taking a long time to run because the log IO is getting throttled by wbt_wait() - the block layer writeback throttle. It's being throttled because there is a huge amount of metadata writeback going on which is filling the request queue. IOWs, we have a priority inversion problem here. Mark the log IO bios with REQ_IDLE so they don't get throttled by the block layer writeback throttle. When we are forcing the CIL, we are likely to need to to tens of log IOs, and they are issued as fast as they can be build and IO completed. Hence REQ_IDLE is appropriate - it's an indication that more IO will follow shortly. And because we also set REQ_SYNC, the writeback throttle will now treat log IO the same way it treats direct IO writes - it will not throttle them at all. Hence we solve the priority inversion problem caused by the writeback throttle being unable to distinguish between high priority log IO and background metadata writeback. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Allison Collins Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 7d1355a9cc43..46108ca20d85 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1687,7 +1687,15 @@ xlog_write_iclog( iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; iclog->ic_bio.bi_end_io = xlog_bio_end_io; iclog->ic_bio.bi_private = iclog; - iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA; + + /* + * We use REQ_SYNC | REQ_IDLE here to tell the block layer the are more + * IOs coming immediately after this one. This prevents the block layer + * writeback throttle from throttling log writes behind background + * metadata writeback and causing priority inversions. + */ + iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | + REQ_IDLE | REQ_FUA; if (need_flush) iclog->ic_bio.bi_opf |= REQ_PREFLUSH; From 12eba65b28b085ddebcedbda4a6aa1b9eb94ce20 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 24 Mar 2020 20:10:28 -0700 Subject: [PATCH 023/280] xfs: Improve metadata buffer reclaim accountability The buffer cache shrinker frees more than just the xfs_buf slab objects - it also frees the pages attached to the buffers. Make sure the memory reclaim code accounts for this memory being freed correctly, similar to how the inode shrinker accounts for pages freed from the page cache due to mapping invalidation. We also need to make sure that the mm subsystem knows these are reclaimable objects. We provide the memory reclaim subsystem with a a shrinker to reclaim xfs_bufs, so we should really mark the slab that way. We also have a lot of xfs_bufs in a busy system, spread them around like we do inodes. Signed-off-by: Dave Chinner Reviewed-by: Allison Collins Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index f880141a2268..9ec3eaf1c618 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -327,6 +327,9 @@ xfs_buf_free( __free_page(page); } + if (current->reclaim_state) + current->reclaim_state->reclaimed_slab += + bp->b_page_count; } else if (bp->b_flags & _XBF_KMEM) kmem_free(bp->b_addr); _xfs_buf_free_pages(bp); @@ -2114,9 +2117,11 @@ xfs_buf_delwri_pushbuf( int __init xfs_buf_init(void) { - xfs_buf_zone = kmem_cache_create("xfs_buf", - sizeof(struct xfs_buf), 0, - SLAB_HWCACHE_ALIGN, NULL); + xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_MEM_SPREAD, + NULL); if (!xfs_buf_zone) goto out; From d59eadaea2b9945095d4d6d44367ebabd604395c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 24 Mar 2020 20:10:28 -0700 Subject: [PATCH 024/280] xfs: correctly acount for reclaimable slabs The XFS inode item slab actually reclaimed by inode shrinker callbacks from the memory reclaim subsystem. These should be marked as reclaimable so the mm subsystem has the full picture of how much memory it can actually reclaim from the XFS slab caches. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Allison Collins Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 2094386af8ac..68fea439d974 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1861,7 +1861,8 @@ xfs_init_zones(void) xfs_ili_zone = kmem_cache_create("xfs_ili", sizeof(struct xfs_inode_log_item), 0, - SLAB_MEM_SPREAD, NULL); + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + NULL); if (!xfs_ili_zone) goto out_destroy_inode_zone; From 4165994ac9672d91134675caa6de3645a9ace6c8 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 24 Mar 2020 20:10:29 -0700 Subject: [PATCH 025/280] xfs: factor common AIL item deletion code Factor the common AIL deletion code that does all the wakeups into a helper so we only have one copy of this somewhat tricky code to interface with all the wakeups necessary when the LSN of the log tail changes. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Allison Collins Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_inode_item.c | 12 +---------- fs/xfs/xfs_trans_ail.c | 48 ++++++++++++++++++++++------------------- fs/xfs/xfs_trans_priv.h | 4 +++- 3 files changed, 30 insertions(+), 34 deletions(-) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 4a3d13d4a022..bd8c36809870 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -742,17 +742,7 @@ xfs_iflush_done( xfs_clear_li_failed(blip); } } - - if (mlip_changed) { - if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount)) - xlog_assign_tail_lsn_locked(ailp->ail_mount); - if (list_empty(&ailp->ail_head)) - wake_up_all(&ailp->ail_empty); - } - spin_unlock(&ailp->ail_lock); - - if (mlip_changed) - xfs_log_space_wake(ailp->ail_mount); + xfs_ail_update_finish(ailp, mlip_changed); } /* diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 2ef0dfbfb303..26d2e7928121 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -681,6 +681,27 @@ xfs_ail_push_all_sync( finish_wait(&ailp->ail_empty, &wait); } +void +xfs_ail_update_finish( + struct xfs_ail *ailp, + bool do_tail_update) __releases(ailp->ail_lock) +{ + struct xfs_mount *mp = ailp->ail_mount; + + if (!do_tail_update) { + spin_unlock(&ailp->ail_lock); + return; + } + + if (!XFS_FORCED_SHUTDOWN(mp)) + xlog_assign_tail_lsn_locked(mp); + + if (list_empty(&ailp->ail_head)) + wake_up_all(&ailp->ail_empty); + spin_unlock(&ailp->ail_lock); + xfs_log_space_wake(mp); +} + /* * xfs_trans_ail_update - bulk AIL insertion operation. * @@ -740,15 +761,7 @@ xfs_trans_ail_update_bulk( if (!list_empty(&tmp)) xfs_ail_splice(ailp, cur, &tmp, lsn); - if (mlip_changed) { - if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount)) - xlog_assign_tail_lsn_locked(ailp->ail_mount); - spin_unlock(&ailp->ail_lock); - - xfs_log_space_wake(ailp->ail_mount); - } else { - spin_unlock(&ailp->ail_lock); - } + xfs_ail_update_finish(ailp, mlip_changed); } bool @@ -792,10 +805,10 @@ void xfs_trans_ail_delete( struct xfs_ail *ailp, struct xfs_log_item *lip, - int shutdown_type) __releases(ailp->ail_lock) + int shutdown_type) { struct xfs_mount *mp = ailp->ail_mount; - bool mlip_changed; + bool need_update; if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) { spin_unlock(&ailp->ail_lock); @@ -808,17 +821,8 @@ xfs_trans_ail_delete( return; } - mlip_changed = xfs_ail_delete_one(ailp, lip); - if (mlip_changed) { - if (!XFS_FORCED_SHUTDOWN(mp)) - xlog_assign_tail_lsn_locked(mp); - if (list_empty(&ailp->ail_head)) - wake_up_all(&ailp->ail_empty); - } - - spin_unlock(&ailp->ail_lock); - if (mlip_changed) - xfs_log_space_wake(ailp->ail_mount); + need_update = xfs_ail_delete_one(ailp, lip); + xfs_ail_update_finish(ailp, need_update); } int diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 2e073c1c4614..64ffa746730e 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -92,8 +92,10 @@ xfs_trans_ail_update( } bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); +void xfs_ail_update_finish(struct xfs_ail *ailp, bool do_tail_update) + __releases(ailp->ail_lock); void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip, - int shutdown_type) __releases(ailp->ail_lock); + int shutdown_type); static inline void xfs_trans_ail_remove( From 8eb807bd839938b45bf7a97f0568d2a845ba6929 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 24 Mar 2020 20:10:29 -0700 Subject: [PATCH 026/280] xfs: tail updates only need to occur when LSN changes We currently wake anything waiting on the log tail to move whenever the log item at the tail of the log is removed. Historically this was fine behaviour because there were very few items at any given LSN. But with delayed logging, there may be thousands of items at any given LSN, and we can't move the tail until they are all gone. Hence if we are removing them in near tail-first order, we might be waking up processes waiting on the tail LSN to change (e.g. log space waiters) repeatedly without them being able to make progress. This also occurs with the new sync push waiters, and can result in thousands of spurious wakeups every second when under heavy direct reclaim pressure. To fix this, check that the tail LSN has actually changed on the AIL before triggering wakeups. This will reduce the number of spurious wakeups when doing bulk AIL removal and make this code much more efficient. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Allison Collins Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_inode_item.c | 18 ++++++++++---- fs/xfs/xfs_trans_ail.c | 52 ++++++++++++++++++++++++++++------------- fs/xfs/xfs_trans_priv.h | 4 ++-- 3 files changed, 51 insertions(+), 23 deletions(-) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index bd8c36809870..a627cb951dc6 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -730,19 +730,27 @@ xfs_iflush_done( * holding the lock before removing the inode from the AIL. */ if (need_ail) { - bool mlip_changed = false; + xfs_lsn_t tail_lsn = 0; /* this is an opencoded batch version of xfs_trans_ail_delete */ spin_lock(&ailp->ail_lock); list_for_each_entry(blip, &tmp, li_bio_list) { if (INODE_ITEM(blip)->ili_logged && - blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) - mlip_changed |= xfs_ail_delete_one(ailp, blip); - else { + blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) { + /* + * xfs_ail_update_finish() only cares about the + * lsn of the first tail item removed, any + * others will be at the same or higher lsn so + * we just ignore them. + */ + xfs_lsn_t lsn = xfs_ail_delete_one(ailp, blip); + if (!tail_lsn && lsn) + tail_lsn = lsn; + } else { xfs_clear_li_failed(blip); } } - xfs_ail_update_finish(ailp, mlip_changed); + xfs_ail_update_finish(ailp, tail_lsn); } /* diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 26d2e7928121..564253550b75 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -109,17 +109,25 @@ xfs_ail_next( * We need the AIL lock in order to get a coherent read of the lsn of the last * item in the AIL. */ +static xfs_lsn_t +__xfs_ail_min_lsn( + struct xfs_ail *ailp) +{ + struct xfs_log_item *lip = xfs_ail_min(ailp); + + if (lip) + return lip->li_lsn; + return 0; +} + xfs_lsn_t xfs_ail_min_lsn( struct xfs_ail *ailp) { - xfs_lsn_t lsn = 0; - struct xfs_log_item *lip; + xfs_lsn_t lsn; spin_lock(&ailp->ail_lock); - lip = xfs_ail_min(ailp); - if (lip) - lsn = lip->li_lsn; + lsn = __xfs_ail_min_lsn(ailp); spin_unlock(&ailp->ail_lock); return lsn; @@ -684,11 +692,12 @@ xfs_ail_push_all_sync( void xfs_ail_update_finish( struct xfs_ail *ailp, - bool do_tail_update) __releases(ailp->ail_lock) + xfs_lsn_t old_lsn) __releases(ailp->ail_lock) { struct xfs_mount *mp = ailp->ail_mount; - if (!do_tail_update) { + /* if the tail lsn hasn't changed, don't do updates or wakeups. */ + if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) { spin_unlock(&ailp->ail_lock); return; } @@ -733,7 +742,7 @@ xfs_trans_ail_update_bulk( xfs_lsn_t lsn) __releases(ailp->ail_lock) { struct xfs_log_item *mlip; - int mlip_changed = 0; + xfs_lsn_t tail_lsn = 0; int i; LIST_HEAD(tmp); @@ -748,9 +757,10 @@ xfs_trans_ail_update_bulk( continue; trace_xfs_ail_move(lip, lip->li_lsn, lsn); + if (mlip == lip && !tail_lsn) + tail_lsn = lip->li_lsn; + xfs_ail_delete(ailp, lip); - if (mlip == lip) - mlip_changed = 1; } else { trace_xfs_ail_insert(lip, 0, lsn); } @@ -761,15 +771,23 @@ xfs_trans_ail_update_bulk( if (!list_empty(&tmp)) xfs_ail_splice(ailp, cur, &tmp, lsn); - xfs_ail_update_finish(ailp, mlip_changed); + xfs_ail_update_finish(ailp, tail_lsn); } -bool +/* + * Delete one log item from the AIL. + * + * If this item was at the tail of the AIL, return the LSN of the log item so + * that we can use it to check if the LSN of the tail of the log has moved + * when finishing up the AIL delete process in xfs_ail_update_finish(). + */ +xfs_lsn_t xfs_ail_delete_one( struct xfs_ail *ailp, struct xfs_log_item *lip) { struct xfs_log_item *mlip = xfs_ail_min(ailp); + xfs_lsn_t lsn = lip->li_lsn; trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); xfs_ail_delete(ailp, lip); @@ -777,7 +795,9 @@ xfs_ail_delete_one( clear_bit(XFS_LI_IN_AIL, &lip->li_flags); lip->li_lsn = 0; - return mlip == lip; + if (mlip == lip) + return lsn; + return 0; } /** @@ -808,7 +828,7 @@ xfs_trans_ail_delete( int shutdown_type) { struct xfs_mount *mp = ailp->ail_mount; - bool need_update; + xfs_lsn_t tail_lsn; if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) { spin_unlock(&ailp->ail_lock); @@ -821,8 +841,8 @@ xfs_trans_ail_delete( return; } - need_update = xfs_ail_delete_one(ailp, lip); - xfs_ail_update_finish(ailp, need_update); + tail_lsn = xfs_ail_delete_one(ailp, lip); + xfs_ail_update_finish(ailp, tail_lsn); } int diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 64ffa746730e..35655eac01a6 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -91,8 +91,8 @@ xfs_trans_ail_update( xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); } -bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); -void xfs_ail_update_finish(struct xfs_ail *ailp, bool do_tail_update) +xfs_lsn_t xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); +void xfs_ail_update_finish(struct xfs_ail *ailp, xfs_lsn_t old_lsn) __releases(ailp->ail_lock); void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip, int shutdown_type); From 5806165a6663544ea41bc3216f5c5effbde4799e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 24 Mar 2020 20:10:30 -0700 Subject: [PATCH 027/280] xfs: factor inode lookup from xfs_ifree_cluster There's lots of indent in this code which makes it a bit hard to follow. We are also going to completely rework the inode lookup code as part of the inode reclaim rework, so factor out the inode lookup code from the inode cluster freeing code. Based on prototype code from Christoph Hellwig. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_inode.c | 152 +++++++++++++++++++++++++-------------------- 1 file changed, 84 insertions(+), 68 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 14b922f2a6db..5c930863ed5b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2503,6 +2503,88 @@ xfs_iunlink_remove( return error; } +/* + * Look up the inode number specified and mark it stale if it is found. If it is + * dirty, return the inode so it can be attached to the cluster buffer so it can + * be processed appropriately when the cluster free transaction completes. + */ +static struct xfs_inode * +xfs_ifree_get_one_inode( + struct xfs_perag *pag, + struct xfs_inode *free_ip, + int inum) +{ + struct xfs_mount *mp = pag->pag_mount; + struct xfs_inode *ip; + +retry: + rcu_read_lock(); + ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum)); + + /* Inode not in memory, nothing to do */ + if (!ip) + goto out_rcu_unlock; + + /* + * because this is an RCU protected lookup, we could find a recently + * freed or even reallocated inode during the lookup. We need to check + * under the i_flags_lock for a valid inode here. Skip it if it is not + * valid, the wrong inode or stale. + */ + spin_lock(&ip->i_flags_lock); + if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) { + spin_unlock(&ip->i_flags_lock); + goto out_rcu_unlock; + } + spin_unlock(&ip->i_flags_lock); + + /* + * Don't try to lock/unlock the current inode, but we _cannot_ skip the + * other inodes that we did not find in the list attached to the buffer + * and are not already marked stale. If we can't lock it, back off and + * retry. + */ + if (ip != free_ip) { + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { + rcu_read_unlock(); + delay(1); + goto retry; + } + + /* + * Check the inode number again in case we're racing with + * freeing in xfs_reclaim_inode(). See the comments in that + * function for more information as to why the initial check is + * not sufficient. + */ + if (ip->i_ino != inum) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + goto out_rcu_unlock; + } + } + rcu_read_unlock(); + + xfs_iflock(ip); + xfs_iflags_set(ip, XFS_ISTALE); + + /* + * We don't need to attach clean inodes or those only with unlogged + * changes (which we throw away, anyway). + */ + if (!ip->i_itemp || xfs_inode_clean(ip)) { + ASSERT(ip != free_ip); + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + goto out_no_inode; + } + return ip; + +out_rcu_unlock: + rcu_read_unlock(); +out_no_inode: + return NULL; +} + /* * A big issue when freeing the inode cluster is that we _cannot_ skip any * inodes that are in memory - they all must be marked stale and attached to @@ -2603,77 +2685,11 @@ xfs_ifree_cluster( * even trying to lock them. */ for (i = 0; i < igeo->inodes_per_cluster; i++) { -retry: - rcu_read_lock(); - ip = radix_tree_lookup(&pag->pag_ici_root, - XFS_INO_TO_AGINO(mp, (inum + i))); - - /* Inode not in memory, nothing to do */ - if (!ip) { - rcu_read_unlock(); + ip = xfs_ifree_get_one_inode(pag, free_ip, inum + i); + if (!ip) continue; - } - /* - * because this is an RCU protected lookup, we could - * find a recently freed or even reallocated inode - * during the lookup. We need to check under the - * i_flags_lock for a valid inode here. Skip it if it - * is not valid, the wrong inode or stale. - */ - spin_lock(&ip->i_flags_lock); - if (ip->i_ino != inum + i || - __xfs_iflags_test(ip, XFS_ISTALE)) { - spin_unlock(&ip->i_flags_lock); - rcu_read_unlock(); - continue; - } - spin_unlock(&ip->i_flags_lock); - - /* - * Don't try to lock/unlock the current inode, but we - * _cannot_ skip the other inodes that we did not find - * in the list attached to the buffer and are not - * already marked stale. If we can't lock it, back off - * and retry. - */ - if (ip != free_ip) { - if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { - rcu_read_unlock(); - delay(1); - goto retry; - } - - /* - * Check the inode number again in case we're - * racing with freeing in xfs_reclaim_inode(). - * See the comments in that function for more - * information as to why the initial check is - * not sufficient. - */ - if (ip->i_ino != inum + i) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - rcu_read_unlock(); - continue; - } - } - rcu_read_unlock(); - - xfs_iflock(ip); - xfs_iflags_set(ip, XFS_ISTALE); - - /* - * we don't need to attach clean inodes or those only - * with unlogged changes (which we throw away, anyway). - */ iip = ip->i_itemp; - if (!iip || xfs_inode_clean(ip)) { - ASSERT(ip != free_ip); - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - continue; - } - iip->ili_last_fields = iip->ili_fields; iip->ili_fields = 0; iip->ili_fsync_fields = 0; From 5cc3c006eb45524860c4d1dd4dd7ad4a506bf3f5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 26 Mar 2020 10:26:44 -0700 Subject: [PATCH 028/280] xfs: don't write a corrupt unmount record to force summary counter recalc In commit f467cad95f5e3, I added the ability to force a recalculation of the filesystem summary counters if they seemed incorrect. This was done (not entirely correctly) by tweaking the log code to write an unmount record without the UMOUNT_TRANS flag set. At next mount, the log recovery code will fail to find the unmount record and go into recovery, which triggers the recalculation. What actually gets written to the log is what ought to be an unmount record, but without any flags set to indicate what kind of record it actually is. This worked to trigger the recalculation, but we shouldn't write bogus log records when we could simply write nothing. Fixes: f467cad95f5e3 ("xfs: force summary counter recalc at next mount") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Brian Foster --- fs/xfs/xfs_log.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 46108ca20d85..00fda2e8e738 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -835,19 +835,6 @@ xlog_unmount_write( if (error) goto out_err; - /* - * If we think the summary counters are bad, clear the unmount header - * flag in the unmount record so that the summary counters will be - * recalculated during log recovery at next mount. Refer to - * xlog_check_unmount_rec for more details. - */ - if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp, - XFS_ERRTAG_FORCE_SUMMARY_RECALC)) { - xfs_alert(mp, "%s: will fix summary counters at next mount", - __func__); - flags &= ~XLOG_UNMOUNT_TRANS; - } - error = xlog_write_unmount_record(log, tic, &lsn, flags); /* * At this point, we're umounting anyway, so there's no point in @@ -913,6 +900,20 @@ xfs_log_unmount_write( if (XLOG_FORCED_SHUTDOWN(log)) return; + + /* + * If we think the summary counters are bad, avoid writing the unmount + * record to force log recovery at next mount, after which the summary + * counters will be recalculated. Refer to xlog_check_unmount_rec for + * more details. + */ + if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp, + XFS_ERRTAG_FORCE_SUMMARY_RECALC)) { + xfs_alert(mp, "%s: will fix summary counters at next mount", + __func__); + return; + } + xfs_log_unmount_verify_iclog(log); xlog_unmount_write(log); } From 63337b63e7dab667bc0b4c3d468eb7e0dcf5f384 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Fri, 27 Mar 2020 08:28:39 -0700 Subject: [PATCH 029/280] xfs: remove unnecessary ternary from xfs_create Since the "no-allocation" reservations for file creations has been removed, the resblks value should be larger than zero, so remove unnecessary ternary conditional. Signed-off-by: Kaixu Xia Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong [darrick: s/judgment/ternary/] Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5c930863ed5b..0cac0d37e3ae 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1200,8 +1200,7 @@ xfs_create( unlock_dp_on_error = false; error = xfs_dir_createname(tp, dp, name, ip->i_ino, - resblks ? - resblks - XFS_IALLOC_SPACE_RES(mp) : 0); + resblks - XFS_IALLOC_SPACE_RES(mp)); if (error) { ASSERT(error != -ENOSPC); goto out_trans_cancel; From 8d3d7e2b35ea7d91d6e085c93b5efecfb0fba307 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 27 Mar 2020 08:29:45 -0700 Subject: [PATCH 030/280] xfs: trylock underlying buffer on dquot flush A dquot flush currently blocks on the buffer lock for the underlying dquot buffer. In turn, this causes xfsaild to block rather than continue processing other items in the meantime. Update xfs_qm_dqflush() to trylock the buffer, similar to how inode buffers are handled, and return -EAGAIN if the lock fails. Fix up any callers that don't currently handle the error properly. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_dquot.c | 6 +++--- fs/xfs/xfs_dquot_item.c | 3 ++- fs/xfs/xfs_qm.c | 14 +++++++++----- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 711376ca269f..af2c8e5ceea0 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1105,8 +1105,8 @@ xfs_qm_dqflush( * Get the buffer containing the on-disk dquot */ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0, &bp, - &xfs_dquot_buf_ops); + mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK, + &bp, &xfs_dquot_buf_ops); if (error) goto out_unlock; @@ -1177,7 +1177,7 @@ xfs_qm_dqflush( out_unlock: xfs_dqfunlock(dqp); - return -EIO; + return error; } /* diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index cf65e2e43c6e..baad1748d0d1 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -189,7 +189,8 @@ xfs_qm_dquot_logitem_push( if (!xfs_buf_delwri_queue(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_relse(bp); - } + } else if (error == -EAGAIN) + rval = XFS_ITEM_LOCKED; spin_lock(&lip->li_ailp->ail_lock); out_unlock: diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index cabdb755adae..c225691fad15 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -121,12 +121,11 @@ xfs_qm_dqpurge( { struct xfs_mount *mp = dqp->q_mount; struct xfs_quotainfo *qi = mp->m_quotainfo; + int error = -EAGAIN; xfs_dqlock(dqp); - if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { - xfs_dqunlock(dqp); - return -EAGAIN; - } + if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) + goto out_unlock; dqp->dq_flags |= XFS_DQ_FREEING; @@ -139,7 +138,6 @@ xfs_qm_dqpurge( */ if (XFS_DQ_IS_DIRTY(dqp)) { struct xfs_buf *bp = NULL; - int error; /* * We don't care about getting disk errors here. We need @@ -149,6 +147,8 @@ xfs_qm_dqpurge( if (!error) { error = xfs_bwrite(bp); xfs_buf_relse(bp); + } else if (error == -EAGAIN) { + goto out_unlock; } xfs_dqflock(dqp); } @@ -174,6 +174,10 @@ xfs_qm_dqpurge( xfs_qm_dqdestroy(dqp); return 0; + +out_unlock: + xfs_dqunlock(dqp); + return error; } /* From d4bc4c5fd177066b38e3a39ac751399e8dff80cf Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 27 Mar 2020 08:29:55 -0700 Subject: [PATCH 031/280] xfs: return locked status of inode buffer on xfsaild push If the inode buffer backing a particular inode is locked, xfs_iflush() returns -EAGAIN and xfs_inode_item_push() skips the inode. It still returns success to xfsaild, however, which bypasses the xfsaild backoff heuristic. Update xfs_inode_item_push() to return locked status if the inode buffer couldn't be locked. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_inode_item.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index a627cb951dc6..f779cca2346f 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -552,7 +552,8 @@ xfs_inode_item_push( if (!xfs_buf_delwri_queue(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_relse(bp); - } + } else if (error == -EAGAIN) + rval = XFS_ITEM_LOCKED; spin_lock(&lip->li_ailp->ail_lock); out_unlock: From 840eda9602d30342486e85e7e96499f565572e4b Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 22 Mar 2020 11:13:00 -0700 Subject: [PATCH 032/280] scsi: lpfc: Fix erroneous cpu limit of 128 on I/O statistics The cpu io statistics were capped by a hard define limit of 128. This effectively was a max number of CPUs, not an actual CPU count, nor actual CPU numbers which can be even larger than both of those values. This made stats off/misleading and on large CPU count systems, wrong. Fix the stats so that all CPUs can have a stats struct. Fix the looping such that it loops by hdwq, finds CPUs that used the hdwq, and sum the stats, then display. Link: https://lore.kernel.org/r/20200322181304.37655-9-jsmart2021@gmail.com Signed-off-by: James Smart Signed-off-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 9 +- drivers/scsi/lpfc/lpfc_debugfs.c | 208 ++++++++++++++++++------------- drivers/scsi/lpfc/lpfc_debugfs.h | 1 - drivers/scsi/lpfc/lpfc_init.c | 28 +++++ drivers/scsi/lpfc/lpfc_nvme.c | 45 ++++--- drivers/scsi/lpfc/lpfc_nvmet.c | 55 ++++---- drivers/scsi/lpfc/lpfc_scsi.c | 23 +--- drivers/scsi/lpfc/lpfc_sli4.h | 19 +-- 8 files changed, 217 insertions(+), 171 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index e940a49f9f02..e4924b9fa69c 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -481,7 +481,7 @@ struct lpfc_vport { struct dentry *debug_nvmestat; struct dentry *debug_scsistat; struct dentry *debug_nvmektime; - struct dentry *debug_cpucheck; + struct dentry *debug_hdwqstat; struct dentry *vport_debugfs_root; struct lpfc_debugfs_trc *disc_trc; atomic_t disc_trc_cnt; @@ -1175,12 +1175,11 @@ struct lpfc_hba { uint16_t sfp_warning; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS - uint16_t cpucheck_on; + uint16_t hdwqstat_on; #define LPFC_CHECK_OFF 0 #define LPFC_CHECK_NVME_IO 1 -#define LPFC_CHECK_NVMET_RCV 2 -#define LPFC_CHECK_NVMET_IO 4 -#define LPFC_CHECK_SCSI_IO 8 +#define LPFC_CHECK_NVMET_IO 2 +#define LPFC_CHECK_SCSI_IO 4 uint16_t ktime_on; uint64_t ktime_data_samples; uint64_t ktime_status_samples; diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 819335b16c2e..1b8be1006cbe 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -1603,42 +1603,50 @@ lpfc_debugfs_nvmeio_trc_data(struct lpfc_hba *phba, char *buf, int size) } /** - * lpfc_debugfs_cpucheck_data - Dump target node list to a buffer + * lpfc_debugfs_hdwqstat_data - Dump I/O stats to a buffer * @vport: The vport to gather target node info from. * @buf: The buffer to dump log into. * @size: The maximum amount of data to process. * * Description: - * This routine dumps the NVME statistics associated with @vport + * This routine dumps the NVME + SCSI statistics associated with @vport * * Return Value: * This routine returns the amount of bytes that were dumped into @buf and will * not exceed @size. **/ static int -lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size) +lpfc_debugfs_hdwqstat_data(struct lpfc_vport *vport, char *buf, int size) { struct lpfc_hba *phba = vport->phba; struct lpfc_sli4_hdw_queue *qp; - int i, j, max_cnt; - int len = 0; + struct lpfc_hdwq_stat *c_stat; + int i, j, len; uint32_t tot_xmt; uint32_t tot_rcv; uint32_t tot_cmpl; + char tmp[LPFC_MAX_SCSI_INFO_TMP_LEN] = {0}; - len += scnprintf(buf + len, PAGE_SIZE - len, - "CPUcheck %s ", - (phba->cpucheck_on & LPFC_CHECK_NVME_IO ? - "Enabled" : "Disabled")); - if (phba->nvmet_support) { - len += scnprintf(buf + len, PAGE_SIZE - len, - "%s\n", - (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV ? - "Rcv Enabled\n" : "Rcv Disabled\n")); - } else { - len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); - } - max_cnt = size - LPFC_DEBUG_OUT_LINE_SZ; + scnprintf(tmp, sizeof(tmp), "HDWQ Stats:\n\n"); + if (strlcat(buf, tmp, size) >= size) + goto buffer_done; + + scnprintf(tmp, sizeof(tmp), "(NVME Accounting: %s) ", + (phba->hdwqstat_on & + (LPFC_CHECK_NVME_IO | LPFC_CHECK_NVMET_IO) ? + "Enabled" : "Disabled")); + if (strlcat(buf, tmp, size) >= size) + goto buffer_done; + + scnprintf(tmp, sizeof(tmp), "(SCSI Accounting: %s) ", + (phba->hdwqstat_on & LPFC_CHECK_SCSI_IO ? + "Enabled" : "Disabled")); + if (strlcat(buf, tmp, size) >= size) + goto buffer_done; + + scnprintf(tmp, sizeof(tmp), "\n\n"); + if (strlcat(buf, tmp, size) >= size) + goto buffer_done; for (i = 0; i < phba->cfg_hdw_queue; i++) { qp = &phba->sli4_hba.hdwq[i]; @@ -1646,46 +1654,76 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size) tot_rcv = 0; tot_xmt = 0; tot_cmpl = 0; - for (j = 0; j < LPFC_CHECK_CPU_CNT; j++) { - tot_xmt += qp->cpucheck_xmt_io[j]; - tot_cmpl += qp->cpucheck_cmpl_io[j]; + + for_each_present_cpu(j) { + c_stat = per_cpu_ptr(phba->sli4_hba.c_stat, j); + + /* Only display for this HDWQ */ + if (i != c_stat->hdwq_no) + continue; + + /* Only display non-zero counters */ + if (!c_stat->xmt_io && !c_stat->cmpl_io && + !c_stat->rcv_io) + continue; + + if (!tot_xmt && !tot_cmpl && !tot_rcv) { + /* Print HDWQ string only the first time */ + scnprintf(tmp, sizeof(tmp), "[HDWQ %d]:\t", i); + if (strlcat(buf, tmp, size) >= size) + goto buffer_done; + } + + tot_xmt += c_stat->xmt_io; + tot_cmpl += c_stat->cmpl_io; if (phba->nvmet_support) - tot_rcv += qp->cpucheck_rcv_io[j]; + tot_rcv += c_stat->rcv_io; + + scnprintf(tmp, sizeof(tmp), "| [CPU %d]: ", j); + if (strlcat(buf, tmp, size) >= size) + goto buffer_done; + + if (phba->nvmet_support) { + scnprintf(tmp, sizeof(tmp), + "XMT 0x%x CMPL 0x%x RCV 0x%x |", + c_stat->xmt_io, c_stat->cmpl_io, + c_stat->rcv_io); + if (strlcat(buf, tmp, size) >= size) + goto buffer_done; + } else { + scnprintf(tmp, sizeof(tmp), + "XMT 0x%x CMPL 0x%x |", + c_stat->xmt_io, c_stat->cmpl_io); + if (strlcat(buf, tmp, size) >= size) + goto buffer_done; + } } - /* Only display Hardware Qs with something */ + /* Check if nothing to display */ if (!tot_xmt && !tot_cmpl && !tot_rcv) continue; - len += scnprintf(buf + len, PAGE_SIZE - len, - "HDWQ %03d: ", i); - for (j = 0; j < LPFC_CHECK_CPU_CNT; j++) { - /* Only display non-zero counters */ - if (!qp->cpucheck_xmt_io[j] && - !qp->cpucheck_cmpl_io[j] && - !qp->cpucheck_rcv_io[j]) - continue; - if (phba->nvmet_support) { - len += scnprintf(buf + len, PAGE_SIZE - len, - "CPU %03d: %x/%x/%x ", j, - qp->cpucheck_rcv_io[j], - qp->cpucheck_xmt_io[j], - qp->cpucheck_cmpl_io[j]); - } else { - len += scnprintf(buf + len, PAGE_SIZE - len, - "CPU %03d: %x/%x ", j, - qp->cpucheck_xmt_io[j], - qp->cpucheck_cmpl_io[j]); - } - } - len += scnprintf(buf + len, PAGE_SIZE - len, - "Total: %x\n", tot_xmt); - if (len >= max_cnt) { - len += scnprintf(buf + len, PAGE_SIZE - len, - "Truncated ...\n"); - return len; + scnprintf(tmp, sizeof(tmp), "\t->\t[HDWQ Total: "); + if (strlcat(buf, tmp, size) >= size) + goto buffer_done; + + if (phba->nvmet_support) { + scnprintf(tmp, sizeof(tmp), + "XMT 0x%x CMPL 0x%x RCV 0x%x]\n\n", + tot_xmt, tot_cmpl, tot_rcv); + if (strlcat(buf, tmp, size) >= size) + goto buffer_done; + } else { + scnprintf(tmp, sizeof(tmp), + "XMT 0x%x CMPL 0x%x]\n\n", + tot_xmt, tot_cmpl); + if (strlcat(buf, tmp, size) >= size) + goto buffer_done; } } + +buffer_done: + len = strnlen(buf, size); return len; } @@ -2921,7 +2959,7 @@ lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf, } static int -lpfc_debugfs_cpucheck_open(struct inode *inode, struct file *file) +lpfc_debugfs_hdwqstat_open(struct inode *inode, struct file *file) { struct lpfc_vport *vport = inode->i_private; struct lpfc_debug *debug; @@ -2932,14 +2970,14 @@ lpfc_debugfs_cpucheck_open(struct inode *inode, struct file *file) goto out; /* Round to page boundary */ - debug->buffer = kmalloc(LPFC_CPUCHECK_SIZE, GFP_KERNEL); + debug->buffer = kcalloc(1, LPFC_SCSISTAT_SIZE, GFP_KERNEL); if (!debug->buffer) { kfree(debug); goto out; } - debug->len = lpfc_debugfs_cpucheck_data(vport, debug->buffer, - LPFC_CPUCHECK_SIZE); + debug->len = lpfc_debugfs_hdwqstat_data(vport, debug->buffer, + LPFC_SCSISTAT_SIZE); debug->i_private = inode->i_private; file->private_data = debug; @@ -2950,16 +2988,16 @@ lpfc_debugfs_cpucheck_open(struct inode *inode, struct file *file) } static ssize_t -lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf, +lpfc_debugfs_hdwqstat_write(struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos) { struct lpfc_debug *debug = file->private_data; struct lpfc_vport *vport = (struct lpfc_vport *)debug->i_private; struct lpfc_hba *phba = vport->phba; - struct lpfc_sli4_hdw_queue *qp; + struct lpfc_hdwq_stat *c_stat; char mybuf[64]; char *pbuf; - int i, j; + int i; if (nbytes > 64) nbytes = 64; @@ -2972,41 +3010,39 @@ lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf, if ((strncmp(pbuf, "on", sizeof("on") - 1) == 0)) { if (phba->nvmet_support) - phba->cpucheck_on |= LPFC_CHECK_NVMET_IO; + phba->hdwqstat_on |= LPFC_CHECK_NVMET_IO; else - phba->cpucheck_on |= (LPFC_CHECK_NVME_IO | + phba->hdwqstat_on |= (LPFC_CHECK_NVME_IO | LPFC_CHECK_SCSI_IO); return strlen(pbuf); } else if ((strncmp(pbuf, "nvme_on", sizeof("nvme_on") - 1) == 0)) { if (phba->nvmet_support) - phba->cpucheck_on |= LPFC_CHECK_NVMET_IO; + phba->hdwqstat_on |= LPFC_CHECK_NVMET_IO; else - phba->cpucheck_on |= LPFC_CHECK_NVME_IO; + phba->hdwqstat_on |= LPFC_CHECK_NVME_IO; return strlen(pbuf); } else if ((strncmp(pbuf, "scsi_on", sizeof("scsi_on") - 1) == 0)) { - phba->cpucheck_on |= LPFC_CHECK_SCSI_IO; + if (!phba->nvmet_support) + phba->hdwqstat_on |= LPFC_CHECK_SCSI_IO; return strlen(pbuf); - } else if ((strncmp(pbuf, "rcv", - sizeof("rcv") - 1) == 0)) { - if (phba->nvmet_support) - phba->cpucheck_on |= LPFC_CHECK_NVMET_RCV; - else - return -EINVAL; + } else if ((strncmp(pbuf, "nvme_off", sizeof("nvme_off") - 1) == 0)) { + phba->hdwqstat_on &= ~(LPFC_CHECK_NVME_IO | + LPFC_CHECK_NVMET_IO); + return strlen(pbuf); + } else if ((strncmp(pbuf, "scsi_off", sizeof("scsi_off") - 1) == 0)) { + phba->hdwqstat_on &= ~LPFC_CHECK_SCSI_IO; return strlen(pbuf); } else if ((strncmp(pbuf, "off", sizeof("off") - 1) == 0)) { - phba->cpucheck_on = LPFC_CHECK_OFF; + phba->hdwqstat_on = LPFC_CHECK_OFF; return strlen(pbuf); } else if ((strncmp(pbuf, "zero", sizeof("zero") - 1) == 0)) { - for (i = 0; i < phba->cfg_hdw_queue; i++) { - qp = &phba->sli4_hba.hdwq[i]; - - for (j = 0; j < LPFC_CHECK_CPU_CNT; j++) { - qp->cpucheck_rcv_io[j] = 0; - qp->cpucheck_xmt_io[j] = 0; - qp->cpucheck_cmpl_io[j] = 0; - } + for_each_present_cpu(i) { + c_stat = per_cpu_ptr(phba->sli4_hba.c_stat, i); + c_stat->xmt_io = 0; + c_stat->cmpl_io = 0; + c_stat->rcv_io = 0; } return strlen(pbuf); } @@ -5451,13 +5487,13 @@ static const struct file_operations lpfc_debugfs_op_nvmeio_trc = { .release = lpfc_debugfs_release, }; -#undef lpfc_debugfs_op_cpucheck -static const struct file_operations lpfc_debugfs_op_cpucheck = { +#undef lpfc_debugfs_op_hdwqstat +static const struct file_operations lpfc_debugfs_op_hdwqstat = { .owner = THIS_MODULE, - .open = lpfc_debugfs_cpucheck_open, + .open = lpfc_debugfs_hdwqstat_open, .llseek = lpfc_debugfs_lseek, .read = lpfc_debugfs_read, - .write = lpfc_debugfs_cpucheck_write, + .write = lpfc_debugfs_hdwqstat_write, .release = lpfc_debugfs_release, }; @@ -6081,11 +6117,11 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) vport->vport_debugfs_root, vport, &lpfc_debugfs_op_nvmektime); - snprintf(name, sizeof(name), "cpucheck"); - vport->debug_cpucheck = + snprintf(name, sizeof(name), "hdwqstat"); + vport->debug_hdwqstat = debugfs_create_file(name, 0644, vport->vport_debugfs_root, - vport, &lpfc_debugfs_op_cpucheck); + vport, &lpfc_debugfs_op_hdwqstat); /* * The following section is for additional directories/files for the @@ -6219,8 +6255,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport) debugfs_remove(vport->debug_nvmektime); /* nvmektime */ vport->debug_nvmektime = NULL; - debugfs_remove(vport->debug_cpucheck); /* cpucheck */ - vport->debug_cpucheck = NULL; + debugfs_remove(vport->debug_hdwqstat); /* hdwqstat */ + vport->debug_hdwqstat = NULL; if (vport->vport_debugfs_root) { debugfs_remove(vport->vport_debugfs_root); /* vportX */ diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h index 20f2537af511..6643b9bfd4f3 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.h +++ b/drivers/scsi/lpfc/lpfc_debugfs.h @@ -47,7 +47,6 @@ /* nvmestat output buffer size */ #define LPFC_NVMESTAT_SIZE 8192 #define LPFC_NVMEKTIME_SIZE 8192 -#define LPFC_CPUCHECK_SIZE 8192 #define LPFC_NVMEIO_TRC_SIZE 8192 /* scsistat output buffer size */ diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 1dadf247a0aa..4104bdcdbb6f 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -6951,6 +6951,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) rc = -ENOMEM; goto out_free_hba_cpu_map; } + +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + phba->sli4_hba.c_stat = alloc_percpu(struct lpfc_hdwq_stat); + if (!phba->sli4_hba.c_stat) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3332 Failed allocating per cpu hdwq stats\n"); + rc = -ENOMEM; + goto out_free_hba_eq_info; + } +#endif + /* * Enable sr-iov virtual functions if supported and configured * through the module parameter. @@ -6970,6 +6981,10 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) return 0; +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS +out_free_hba_eq_info: + free_percpu(phba->sli4_hba.eq_info); +#endif out_free_hba_cpu_map: kfree(phba->sli4_hba.cpu_map); out_free_hba_eq_hdl: @@ -7008,6 +7023,9 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba) struct lpfc_fcf_conn_entry *conn_entry, *next_conn_entry; free_percpu(phba->sli4_hba.eq_info); +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + free_percpu(phba->sli4_hba.c_stat); +#endif /* Free memory allocated for msi-x interrupt vector to CPU mapping */ kfree(phba->sli4_hba.cpu_map); @@ -10848,6 +10866,9 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) #ifdef CONFIG_X86 struct cpuinfo_x86 *cpuinfo; #endif +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + struct lpfc_hdwq_stat *c_stat; +#endif max_phys_id = 0; min_phys_id = LPFC_VECTOR_MAP_EMPTY; @@ -11099,10 +11120,17 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) idx = 0; for_each_possible_cpu(cpu) { cpup = &phba->sli4_hba.cpu_map[cpu]; +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + c_stat = per_cpu_ptr(phba->sli4_hba.c_stat, cpu); + c_stat->hdwq_no = cpup->hdwq; +#endif if (cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) continue; cpup->hdwq = idx++ % phba->cfg_hdw_queue; +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + c_stat->hdwq_no = cpup->hdwq; +#endif lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "3340 Set Affinity: not present " "CPU %d hdwq %d\n", diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 32b28651039e..38936b7ce043 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1012,6 +1012,9 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, uint32_t code, status, idx; uint16_t cid, sqhd, data; uint32_t *ptr; +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + int cpu; +#endif /* Sanity check on return of outstanding command */ if (!lpfc_ncmd) { @@ -1184,19 +1187,15 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, phba->ktime_last_cmd = lpfc_ncmd->ts_data_nvme; lpfc_nvme_ktime(phba, lpfc_ncmd); } - if (unlikely(phba->cpucheck_on & LPFC_CHECK_NVME_IO)) { - uint32_t cpu; - idx = lpfc_ncmd->cur_iocbq.hba_wqidx; + if (unlikely(phba->hdwqstat_on & LPFC_CHECK_NVME_IO)) { cpu = raw_smp_processor_id(); - if (cpu < LPFC_CHECK_CPU_CNT) { - if (lpfc_ncmd->cpu != cpu) - lpfc_printf_vlog(vport, - KERN_INFO, LOG_NVME_IOERR, - "6701 CPU Check cmpl: " - "cpu %d expect %d\n", - cpu, lpfc_ncmd->cpu); - phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++; - } + this_cpu_inc(phba->sli4_hba.c_stat->cmpl_io); + if (lpfc_ncmd->cpu != cpu) + lpfc_printf_vlog(vport, + KERN_INFO, LOG_NVME_IOERR, + "6701 CPU Check cmpl: " + "cpu %d expect %d\n", + cpu, lpfc_ncmd->cpu); } #endif @@ -1745,19 +1744,17 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, if (lpfc_ncmd->ts_cmd_start) lpfc_ncmd->ts_cmd_wqput = ktime_get_ns(); - if (phba->cpucheck_on & LPFC_CHECK_NVME_IO) { + if (phba->hdwqstat_on & LPFC_CHECK_NVME_IO) { cpu = raw_smp_processor_id(); - if (cpu < LPFC_CHECK_CPU_CNT) { - lpfc_ncmd->cpu = cpu; - if (idx != cpu) - lpfc_printf_vlog(vport, - KERN_INFO, LOG_NVME_IOERR, - "6702 CPU Check cmd: " - "cpu %d wq %d\n", - lpfc_ncmd->cpu, - lpfc_queue_info->index); - phba->sli4_hba.hdwq[idx].cpucheck_xmt_io[cpu]++; - } + this_cpu_inc(phba->sli4_hba.c_stat->xmt_io); + lpfc_ncmd->cpu = cpu; + if (idx != cpu) + lpfc_printf_vlog(vport, + KERN_INFO, LOG_NVME_IOERR, + "6702 CPU Check cmd: " + "cpu %d wq %d\n", + lpfc_ncmd->cpu, + lpfc_queue_info->index); } #endif return 0; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index ae89d1450912..565419bf8d74 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -707,7 +707,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_nvmet_rcv_ctx *ctxp; uint32_t status, result, op, start_clean, logerr; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS - uint32_t id; + int id; #endif ctxp = cmdwqe->context2; @@ -814,16 +814,14 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, rsp->done(rsp); } #ifdef CONFIG_SCSI_LPFC_DEBUG_FS - if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) { + if (phba->hdwqstat_on & LPFC_CHECK_NVMET_IO) { id = raw_smp_processor_id(); - if (id < LPFC_CHECK_CPU_CNT) { - if (ctxp->cpu != id) - lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR, - "6704 CPU Check cmdcmpl: " - "cpu %d expect %d\n", - id, ctxp->cpu); - phba->sli4_hba.hdwq[rsp->hwqid].cpucheck_cmpl_io[id]++; - } + this_cpu_inc(phba->sli4_hba.c_stat->cmpl_io); + if (ctxp->cpu != id) + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR, + "6704 CPU Check cmdcmpl: " + "cpu %d expect %d\n", + id, ctxp->cpu); } #endif } @@ -931,6 +929,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, struct lpfc_sli_ring *pring; unsigned long iflags; int rc; +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + int id; +#endif if (phba->pport->load_flag & FC_UNLOADING) { rc = -ENODEV; @@ -954,16 +955,14 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, if (!ctxp->hdwq) ctxp->hdwq = &phba->sli4_hba.hdwq[rsp->hwqid]; - if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) { - int id = raw_smp_processor_id(); - if (id < LPFC_CHECK_CPU_CNT) { - if (rsp->hwqid != id) - lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR, - "6705 CPU Check OP: " - "cpu %d expect %d\n", - id, rsp->hwqid); - phba->sli4_hba.hdwq[rsp->hwqid].cpucheck_xmt_io[id]++; - } + if (phba->hdwqstat_on & LPFC_CHECK_NVMET_IO) { + id = raw_smp_processor_id(); + this_cpu_inc(phba->sli4_hba.c_stat->xmt_io); + if (rsp->hwqid != id) + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR, + "6705 CPU Check OP: " + "cpu %d expect %d\n", + id, rsp->hwqid); ctxp->cpu = id; /* Setup cpu for cmpl check */ } #endif @@ -2270,15 +2269,13 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, size = nvmebuf->bytes_recv; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS - if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) { - if (current_cpu < LPFC_CHECK_CPU_CNT) { - if (idx != current_cpu) - lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR, - "6703 CPU Check rcv: " - "cpu %d expect %d\n", - current_cpu, idx); - phba->sli4_hba.hdwq[idx].cpucheck_rcv_io[current_cpu]++; - } + if (phba->hdwqstat_on & LPFC_CHECK_NVMET_IO) { + this_cpu_inc(phba->sli4_hba.c_stat->rcv_io); + if (idx != current_cpu) + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR, + "6703 CPU Check rcv: " + "cpu %d expect %d\n", + current_cpu, idx); } #endif diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index be62795715f7..3caa4fd2b55f 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -3805,9 +3805,6 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, struct Scsi_Host *shost; int idx; uint32_t logit = LOG_FCP; -#ifdef CONFIG_SCSI_LPFC_DEBUG_FS - int cpu; -#endif /* Guard against abort handler being called at same time */ spin_lock(&lpfc_cmd->buf_lock); @@ -3826,11 +3823,8 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, phba->sli4_hba.hdwq[idx].scsi_cstat.io_cmpls++; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS - if (unlikely(phba->cpucheck_on & LPFC_CHECK_SCSI_IO)) { - cpu = raw_smp_processor_id(); - if (cpu < LPFC_CHECK_CPU_CNT && phba->sli4_hba.hdwq) - phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++; - } + if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO)) + this_cpu_inc(phba->sli4_hba.c_stat->cmpl_io); #endif shost = cmd->device->host; @@ -4503,9 +4497,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) struct lpfc_io_buf *lpfc_cmd; struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device)); int err, idx; -#ifdef CONFIG_SCSI_LPFC_DEBUG_FS - int cpu; -#endif rdata = lpfc_rport_data_from_scsi_device(cmnd->device); @@ -4626,14 +4617,8 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp); #ifdef CONFIG_SCSI_LPFC_DEBUG_FS - if (unlikely(phba->cpucheck_on & LPFC_CHECK_SCSI_IO)) { - cpu = raw_smp_processor_id(); - if (cpu < LPFC_CHECK_CPU_CNT) { - struct lpfc_sli4_hdw_queue *hdwq = - &phba->sli4_hba.hdwq[lpfc_cmd->hdwq_no]; - hdwq->cpucheck_xmt_io[cpu]++; - } - } + if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO)) + this_cpu_inc(phba->sli4_hba.c_stat->xmt_io); #endif err = lpfc_sli_issue_iocb(phba, LPFC_FCP_RING, &lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB); diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index d963ca871383..8da7429e385a 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -697,13 +697,6 @@ struct lpfc_sli4_hdw_queue { struct lpfc_lock_stat lock_conflict; #endif -#ifdef CONFIG_SCSI_LPFC_DEBUG_FS -#define LPFC_CHECK_CPU_CNT 128 - uint32_t cpucheck_rcv_io[LPFC_CHECK_CPU_CNT]; - uint32_t cpucheck_xmt_io[LPFC_CHECK_CPU_CNT]; - uint32_t cpucheck_cmpl_io[LPFC_CHECK_CPU_CNT]; -#endif - /* Per HDWQ pool resources */ struct list_head sgl_list; struct list_head cmd_rsp_buf_list; @@ -740,6 +733,15 @@ struct lpfc_sli4_hdw_queue { #define lpfc_qp_spin_lock(lock, qp, lstat) spin_lock(lock) #endif +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS +struct lpfc_hdwq_stat { + u32 hdwq_no; + u32 rcv_io; + u32 xmt_io; + u32 cmpl_io; +}; +#endif + struct lpfc_sli4_hba { void __iomem *conf_regs_memmap_p; /* Kernel memory mapped address for * config space registers @@ -921,6 +923,9 @@ struct lpfc_sli4_hba { struct cpumask numa_mask; uint16_t curr_disp_cpu; struct lpfc_eq_intr_info __percpu *eq_info; +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + struct lpfc_hdwq_stat __percpu *c_stat; +#endif uint32_t conf_trunk; #define lpfc_conf_trunk_port0_WORD conf_trunk #define lpfc_conf_trunk_port0_SHIFT 0 From 2fcbc569b9f51810d01f23b780ce5f5ef72410b9 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 22 Mar 2020 11:13:02 -0700 Subject: [PATCH 033/280] scsi: lpfc: Make debugfs ktime stats generic for NVME and SCSI Currently driver ktime stats, measuring code paths, is NVME-specific. Convert the stats routines such that the code paths are generic, providing status for NVME and SCSI. Added ktime stat calls in SCSI queuecommand and cmpl routines. Link: https://lore.kernel.org/r/20200322181304.37655-11-jsmart2021@gmail.com Signed-off-by: James Smart Signed-off-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 2 +- drivers/scsi/lpfc/lpfc_crtn.h | 1 + drivers/scsi/lpfc/lpfc_debugfs.c | 129 +++++++++++++++++++++++++------ drivers/scsi/lpfc/lpfc_debugfs.h | 2 +- drivers/scsi/lpfc/lpfc_nvme.c | 88 +-------------------- drivers/scsi/lpfc/lpfc_scsi.c | 23 ++++++ drivers/scsi/lpfc/lpfc_sli.h | 2 +- 7 files changed, 137 insertions(+), 110 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index e4924b9fa69c..747eda6ff8a4 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -480,7 +480,7 @@ struct lpfc_vport { struct dentry *debug_nodelist; struct dentry *debug_nvmestat; struct dentry *debug_scsistat; - struct dentry *debug_nvmektime; + struct dentry *debug_ioktime; struct dentry *debug_hdwqstat; struct dentry *vport_debugfs_root; struct lpfc_debugfs_trc *disc_trc; diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index a0ef3bac0612..76dc8d9493d2 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -588,6 +588,7 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba, int); void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd, struct lpfc_sli4_hdw_queue *qp); +void lpfc_io_ktime(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd); void lpfc_nvme_cmd_template(void); void lpfc_nvmet_cmd_template(void); void lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn); diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 1b8be1006cbe..8a6e02aa553f 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -1300,8 +1300,88 @@ lpfc_debugfs_scsistat_data(struct lpfc_vport *vport, char *buf, int size) return len; } +void +lpfc_io_ktime(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) +{ + uint64_t seg1, seg2, seg3, seg4; + uint64_t segsum; + + if (!lpfc_cmd->ts_last_cmd || + !lpfc_cmd->ts_cmd_start || + !lpfc_cmd->ts_cmd_wqput || + !lpfc_cmd->ts_isr_cmpl || + !lpfc_cmd->ts_data_io) + return; + + if (lpfc_cmd->ts_data_io < lpfc_cmd->ts_cmd_start) + return; + if (lpfc_cmd->ts_cmd_start < lpfc_cmd->ts_last_cmd) + return; + if (lpfc_cmd->ts_cmd_wqput < lpfc_cmd->ts_cmd_start) + return; + if (lpfc_cmd->ts_isr_cmpl < lpfc_cmd->ts_cmd_wqput) + return; + if (lpfc_cmd->ts_data_io < lpfc_cmd->ts_isr_cmpl) + return; + /* + * Segment 1 - Time from Last FCP command cmpl is handed + * off to NVME Layer to start of next command. + * Segment 2 - Time from Driver receives a IO cmd start + * from NVME Layer to WQ put is done on IO cmd. + * Segment 3 - Time from Driver WQ put is done on IO cmd + * to MSI-X ISR for IO cmpl. + * Segment 4 - Time from MSI-X ISR for IO cmpl to when + * cmpl is handled off to the NVME Layer. + */ + seg1 = lpfc_cmd->ts_cmd_start - lpfc_cmd->ts_last_cmd; + if (seg1 > 5000000) /* 5 ms - for sequential IOs only */ + seg1 = 0; + + /* Calculate times relative to start of IO */ + seg2 = (lpfc_cmd->ts_cmd_wqput - lpfc_cmd->ts_cmd_start); + segsum = seg2; + seg3 = lpfc_cmd->ts_isr_cmpl - lpfc_cmd->ts_cmd_start; + if (segsum > seg3) + return; + seg3 -= segsum; + segsum += seg3; + + seg4 = lpfc_cmd->ts_data_io - lpfc_cmd->ts_cmd_start; + if (segsum > seg4) + return; + seg4 -= segsum; + + phba->ktime_data_samples++; + phba->ktime_seg1_total += seg1; + if (seg1 < phba->ktime_seg1_min) + phba->ktime_seg1_min = seg1; + else if (seg1 > phba->ktime_seg1_max) + phba->ktime_seg1_max = seg1; + phba->ktime_seg2_total += seg2; + if (seg2 < phba->ktime_seg2_min) + phba->ktime_seg2_min = seg2; + else if (seg2 > phba->ktime_seg2_max) + phba->ktime_seg2_max = seg2; + phba->ktime_seg3_total += seg3; + if (seg3 < phba->ktime_seg3_min) + phba->ktime_seg3_min = seg3; + else if (seg3 > phba->ktime_seg3_max) + phba->ktime_seg3_max = seg3; + phba->ktime_seg4_total += seg4; + if (seg4 < phba->ktime_seg4_min) + phba->ktime_seg4_min = seg4; + else if (seg4 > phba->ktime_seg4_max) + phba->ktime_seg4_max = seg4; + + lpfc_cmd->ts_last_cmd = 0; + lpfc_cmd->ts_cmd_start = 0; + lpfc_cmd->ts_cmd_wqput = 0; + lpfc_cmd->ts_isr_cmpl = 0; + lpfc_cmd->ts_data_io = 0; +} + /** - * lpfc_debugfs_nvmektime_data - Dump target node list to a buffer + * lpfc_debugfs_ioktime_data - Dump target node list to a buffer * @vport: The vport to gather target node info from. * @buf: The buffer to dump log into. * @size: The maximum amount of data to process. @@ -1314,13 +1394,13 @@ lpfc_debugfs_scsistat_data(struct lpfc_vport *vport, char *buf, int size) * not exceed @size. **/ static int -lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size) +lpfc_debugfs_ioktime_data(struct lpfc_vport *vport, char *buf, int size) { struct lpfc_hba *phba = vport->phba; int len = 0; if (phba->nvmet_support == 0) { - /* NVME Initiator */ + /* Initiator */ len += scnprintf(buf + len, PAGE_SIZE - len, "ktime %s: Total Samples: %lld\n", (phba->ktime_on ? "Enabled" : "Disabled"), @@ -1330,8 +1410,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size) len += scnprintf( buf + len, PAGE_SIZE - len, - "Segment 1: Last NVME Cmd cmpl " - "done -to- Start of next NVME cnd (in driver)\n"); + "Segment 1: Last Cmd cmpl " + "done -to- Start of next Cmd (in driver)\n"); len += scnprintf( buf + len, PAGE_SIZE - len, "avg:%08lld min:%08lld max %08lld\n", @@ -1341,7 +1421,7 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size) phba->ktime_seg1_max); len += scnprintf( buf + len, PAGE_SIZE - len, - "Segment 2: Driver start of NVME cmd " + "Segment 2: Driver start of Cmd " "-to- Firmware WQ doorbell\n"); len += scnprintf( buf + len, PAGE_SIZE - len, @@ -1364,7 +1444,7 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size) len += scnprintf( buf + len, PAGE_SIZE - len, "Segment 4: MSI-X ISR cmpl -to- " - "NVME cmpl done\n"); + "Cmd cmpl done\n"); len += scnprintf( buf + len, PAGE_SIZE - len, "avg:%08lld min:%08lld max %08lld\n", @@ -2727,7 +2807,7 @@ lpfc_debugfs_scsistat_write(struct file *file, const char __user *buf, } static int -lpfc_debugfs_nvmektime_open(struct inode *inode, struct file *file) +lpfc_debugfs_ioktime_open(struct inode *inode, struct file *file) { struct lpfc_vport *vport = inode->i_private; struct lpfc_debug *debug; @@ -2738,14 +2818,14 @@ lpfc_debugfs_nvmektime_open(struct inode *inode, struct file *file) goto out; /* Round to page boundary */ - debug->buffer = kmalloc(LPFC_NVMEKTIME_SIZE, GFP_KERNEL); + debug->buffer = kmalloc(LPFC_IOKTIME_SIZE, GFP_KERNEL); if (!debug->buffer) { kfree(debug); goto out; } - debug->len = lpfc_debugfs_nvmektime_data(vport, debug->buffer, - LPFC_NVMEKTIME_SIZE); + debug->len = lpfc_debugfs_ioktime_data(vport, debug->buffer, + LPFC_IOKTIME_SIZE); debug->i_private = inode->i_private; file->private_data = debug; @@ -2756,8 +2836,8 @@ lpfc_debugfs_nvmektime_open(struct inode *inode, struct file *file) } static ssize_t -lpfc_debugfs_nvmektime_write(struct file *file, const char __user *buf, - size_t nbytes, loff_t *ppos) +lpfc_debugfs_ioktime_write(struct file *file, const char __user *buf, + size_t nbytes, loff_t *ppos) { struct lpfc_debug *debug = file->private_data; struct lpfc_vport *vport = (struct lpfc_vport *)debug->i_private; @@ -5467,13 +5547,13 @@ static const struct file_operations lpfc_debugfs_op_scsistat = { .release = lpfc_debugfs_release, }; -#undef lpfc_debugfs_op_nvmektime -static const struct file_operations lpfc_debugfs_op_nvmektime = { +#undef lpfc_debugfs_op_ioktime +static const struct file_operations lpfc_debugfs_op_ioktime = { .owner = THIS_MODULE, - .open = lpfc_debugfs_nvmektime_open, + .open = lpfc_debugfs_ioktime_open, .llseek = lpfc_debugfs_lseek, .read = lpfc_debugfs_read, - .write = lpfc_debugfs_nvmektime_write, + .write = lpfc_debugfs_ioktime_write, .release = lpfc_debugfs_release, }; @@ -6111,11 +6191,16 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) goto debug_failed; } - snprintf(name, sizeof(name), "nvmektime"); - vport->debug_nvmektime = + snprintf(name, sizeof(name), "ioktime"); + vport->debug_ioktime = debugfs_create_file(name, 0644, vport->vport_debugfs_root, - vport, &lpfc_debugfs_op_nvmektime); + vport, &lpfc_debugfs_op_ioktime); + if (!vport->debug_ioktime) { + lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, + "0815 Cannot create debugfs ioktime\n"); + goto debug_failed; + } snprintf(name, sizeof(name), "hdwqstat"); vport->debug_hdwqstat = @@ -6252,8 +6337,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport) debugfs_remove(vport->debug_scsistat); /* scsistat */ vport->debug_scsistat = NULL; - debugfs_remove(vport->debug_nvmektime); /* nvmektime */ - vport->debug_nvmektime = NULL; + debugfs_remove(vport->debug_ioktime); /* ioktime */ + vport->debug_ioktime = NULL; debugfs_remove(vport->debug_hdwqstat); /* hdwqstat */ vport->debug_hdwqstat = NULL; diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h index 6643b9bfd4f3..7ab6d3b08698 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.h +++ b/drivers/scsi/lpfc/lpfc_debugfs.h @@ -46,7 +46,7 @@ /* nvmestat output buffer size */ #define LPFC_NVMESTAT_SIZE 8192 -#define LPFC_NVMEKTIME_SIZE 8192 +#define LPFC_IOKTIME_SIZE 8192 #define LPFC_NVMEIO_TRC_SIZE 8192 /* scsistat output buffer size */ diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 38936b7ce043..0db052a5f542 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -899,88 +899,6 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport, sgl->sge_len = cpu_to_le32(nCmd->rsplen); } -#ifdef CONFIG_SCSI_LPFC_DEBUG_FS -static void -lpfc_nvme_ktime(struct lpfc_hba *phba, - struct lpfc_io_buf *lpfc_ncmd) -{ - uint64_t seg1, seg2, seg3, seg4; - uint64_t segsum; - - if (!lpfc_ncmd->ts_last_cmd || - !lpfc_ncmd->ts_cmd_start || - !lpfc_ncmd->ts_cmd_wqput || - !lpfc_ncmd->ts_isr_cmpl || - !lpfc_ncmd->ts_data_nvme) - return; - - if (lpfc_ncmd->ts_data_nvme < lpfc_ncmd->ts_cmd_start) - return; - if (lpfc_ncmd->ts_cmd_start < lpfc_ncmd->ts_last_cmd) - return; - if (lpfc_ncmd->ts_cmd_wqput < lpfc_ncmd->ts_cmd_start) - return; - if (lpfc_ncmd->ts_isr_cmpl < lpfc_ncmd->ts_cmd_wqput) - return; - if (lpfc_ncmd->ts_data_nvme < lpfc_ncmd->ts_isr_cmpl) - return; - /* - * Segment 1 - Time from Last FCP command cmpl is handed - * off to NVME Layer to start of next command. - * Segment 2 - Time from Driver receives a IO cmd start - * from NVME Layer to WQ put is done on IO cmd. - * Segment 3 - Time from Driver WQ put is done on IO cmd - * to MSI-X ISR for IO cmpl. - * Segment 4 - Time from MSI-X ISR for IO cmpl to when - * cmpl is handled off to the NVME Layer. - */ - seg1 = lpfc_ncmd->ts_cmd_start - lpfc_ncmd->ts_last_cmd; - if (seg1 > 5000000) /* 5 ms - for sequential IOs only */ - seg1 = 0; - - /* Calculate times relative to start of IO */ - seg2 = (lpfc_ncmd->ts_cmd_wqput - lpfc_ncmd->ts_cmd_start); - segsum = seg2; - seg3 = lpfc_ncmd->ts_isr_cmpl - lpfc_ncmd->ts_cmd_start; - if (segsum > seg3) - return; - seg3 -= segsum; - segsum += seg3; - - seg4 = lpfc_ncmd->ts_data_nvme - lpfc_ncmd->ts_cmd_start; - if (segsum > seg4) - return; - seg4 -= segsum; - - phba->ktime_data_samples++; - phba->ktime_seg1_total += seg1; - if (seg1 < phba->ktime_seg1_min) - phba->ktime_seg1_min = seg1; - else if (seg1 > phba->ktime_seg1_max) - phba->ktime_seg1_max = seg1; - phba->ktime_seg2_total += seg2; - if (seg2 < phba->ktime_seg2_min) - phba->ktime_seg2_min = seg2; - else if (seg2 > phba->ktime_seg2_max) - phba->ktime_seg2_max = seg2; - phba->ktime_seg3_total += seg3; - if (seg3 < phba->ktime_seg3_min) - phba->ktime_seg3_min = seg3; - else if (seg3 > phba->ktime_seg3_max) - phba->ktime_seg3_max = seg3; - phba->ktime_seg4_total += seg4; - if (seg4 < phba->ktime_seg4_min) - phba->ktime_seg4_min = seg4; - else if (seg4 > phba->ktime_seg4_max) - phba->ktime_seg4_max = seg4; - - lpfc_ncmd->ts_last_cmd = 0; - lpfc_ncmd->ts_cmd_start = 0; - lpfc_ncmd->ts_cmd_wqput = 0; - lpfc_ncmd->ts_isr_cmpl = 0; - lpfc_ncmd->ts_data_nvme = 0; -} -#endif /** * lpfc_nvme_io_cmd_wqe_cmpl - Complete an NVME-over-FCP IO @@ -1183,9 +1101,9 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (lpfc_ncmd->ts_cmd_start) { lpfc_ncmd->ts_isr_cmpl = pwqeIn->isr_timestamp; - lpfc_ncmd->ts_data_nvme = ktime_get_ns(); - phba->ktime_last_cmd = lpfc_ncmd->ts_data_nvme; - lpfc_nvme_ktime(phba, lpfc_ncmd); + lpfc_ncmd->ts_data_io = ktime_get_ns(); + phba->ktime_last_cmd = lpfc_ncmd->ts_data_io; + lpfc_io_ktime(phba, lpfc_ncmd); } if (unlikely(phba->hdwqstat_on & LPFC_CHECK_NVME_IO)) { cpu = raw_smp_processor_id(); diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 3caa4fd2b55f..ad62fb3f3a54 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -4025,6 +4025,14 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, lpfc_cmd->pCmd = NULL; spin_unlock(&lpfc_cmd->buf_lock); +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + if (lpfc_cmd->ts_cmd_start) { + lpfc_cmd->ts_isr_cmpl = pIocbIn->isr_timestamp; + lpfc_cmd->ts_data_io = ktime_get_ns(); + phba->ktime_last_cmd = lpfc_cmd->ts_data_io; + lpfc_io_ktime(phba, lpfc_cmd); + } +#endif /* The sdev is not guaranteed to be valid post scsi_done upcall. */ cmd->scsi_done(cmd); @@ -4497,6 +4505,12 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) struct lpfc_io_buf *lpfc_cmd; struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device)); int err, idx; +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + uint64_t start = 0L; + + if (phba->ktime_on) + start = ktime_get_ns(); +#endif rdata = lpfc_rport_data_from_scsi_device(cmnd->device); @@ -4622,6 +4636,15 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) #endif err = lpfc_sli_issue_iocb(phba, LPFC_FCP_RING, &lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB); +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + if (start) { + lpfc_cmd->ts_cmd_start = start; + lpfc_cmd->ts_last_cmd = phba->ktime_last_cmd; + lpfc_cmd->ts_cmd_wqput = ktime_get_ns(); + } else { + lpfc_cmd->ts_cmd_start = 0; + } +#endif if (err) { lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, "3376 FCP could not issue IOCB err %x" diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h index 7bcf922a8be2..93d976ea8c5d 100644 --- a/drivers/scsi/lpfc/lpfc_sli.h +++ b/drivers/scsi/lpfc/lpfc_sli.h @@ -446,6 +446,6 @@ struct lpfc_io_buf { uint64_t ts_last_cmd; uint64_t ts_cmd_wqput; uint64_t ts_isr_cmpl; - uint64_t ts_data_nvme; + uint64_t ts_data_io; #endif }; From 0e75461a684f64c07da090da24491b6c589a7d15 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 22 Mar 2020 11:13:03 -0700 Subject: [PATCH 034/280] scsi: lpfc: Remove prototype FIPS/DSS options from SLI-3 During code review, identified dss feature that was a prototype only and was never productized in SLI3. They shouldn't be there and prevents reuse of the command areas. Remove any code in the driver to deal with dss, including code to deal with fips, which is associated with the dss feature. Link: https://lore.kernel.org/r/20200322181304.37655-12-jsmart2021@gmail.com Signed-off-by: James Smart Signed-off-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 9 +---- drivers/scsi/lpfc/lpfc_attr.c | 69 ----------------------------------- drivers/scsi/lpfc/lpfc_hw.h | 20 +++------- drivers/scsi/lpfc/lpfc_mbox.c | 2 - drivers/scsi/lpfc/lpfc_sli.c | 17 --------- 5 files changed, 8 insertions(+), 109 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 747eda6ff8a4..8e2a356911a9 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -207,8 +207,7 @@ typedef struct lpfc_vpd { } rev; struct { #ifdef __BIG_ENDIAN_BITFIELD - uint32_t rsvd3 :19; /* Reserved */ - uint32_t cdss : 1; /* Configure Data Security SLI */ + uint32_t rsvd3 :20; /* Reserved */ uint32_t rsvd2 : 3; /* Reserved */ uint32_t cbg : 1; /* Configure BlockGuard */ uint32_t cmv : 1; /* Configure Max VPIs */ @@ -230,8 +229,7 @@ typedef struct lpfc_vpd { uint32_t cmv : 1; /* Configure Max VPIs */ uint32_t cbg : 1; /* Configure BlockGuard */ uint32_t rsvd2 : 3; /* Reserved */ - uint32_t cdss : 1; /* Configure Data Security SLI */ - uint32_t rsvd3 :19; /* Reserved */ + uint32_t rsvd3 :20; /* Reserved */ #endif } sli3Feat; } lpfc_vpd_t; @@ -887,7 +885,6 @@ struct lpfc_hba { #define LPFC_INITIALIZE_LINK 0 /* do normal init_link mbox */ #define LPFC_DELAY_INIT_LINK 1 /* layered driver hold off */ #define LPFC_DELAY_INIT_LINK_INDEFINITELY 2 /* wait, manual intervention */ - uint32_t cfg_enable_dss; uint32_t cfg_fdmi_on; #define LPFC_FDMI_NO_SUPPORT 0 /* FDMI not supported */ #define LPFC_FDMI_SUPPORT 1 /* FDMI supported? */ @@ -1156,8 +1153,6 @@ struct lpfc_hba { uint32_t iocb_cnt; uint32_t iocb_max; atomic_t sdev_cnt; - uint8_t fips_spec_rev; - uint8_t fips_level; spinlock_t devicelock; /* lock for luns list */ mempool_t *device_data_mem_pool; struct list_head luns; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 4317c9ce7eca..3260e76e55b9 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -2230,66 +2230,6 @@ lpfc_poll_store(struct device *dev, struct device_attribute *attr, return strlen(buf); } -/** - * lpfc_fips_level_show - Return the current FIPS level for the HBA - * @dev: class unused variable. - * @attr: device attribute, not used. - * @buf: on return contains the module description text. - * - * Returns: size of formatted string. - **/ -static ssize_t -lpfc_fips_level_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct Scsi_Host *shost = class_to_shost(dev); - struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; - struct lpfc_hba *phba = vport->phba; - - return scnprintf(buf, PAGE_SIZE, "%d\n", phba->fips_level); -} - -/** - * lpfc_fips_rev_show - Return the FIPS Spec revision for the HBA - * @dev: class unused variable. - * @attr: device attribute, not used. - * @buf: on return contains the module description text. - * - * Returns: size of formatted string. - **/ -static ssize_t -lpfc_fips_rev_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct Scsi_Host *shost = class_to_shost(dev); - struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; - struct lpfc_hba *phba = vport->phba; - - return scnprintf(buf, PAGE_SIZE, "%d\n", phba->fips_spec_rev); -} - -/** - * lpfc_dss_show - Return the current state of dss and the configured state - * @dev: class converted to a Scsi_host structure. - * @attr: device attribute, not used. - * @buf: on return contains the formatted text. - * - * Returns: size of formatted string. - **/ -static ssize_t -lpfc_dss_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct Scsi_Host *shost = class_to_shost(dev); - struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; - struct lpfc_hba *phba = vport->phba; - - return scnprintf(buf, PAGE_SIZE, "%s - %sOperational\n", - (phba->cfg_enable_dss) ? "Enabled" : "Disabled", - (phba->sli3_options & LPFC_SLI3_DSS_ENABLED) ? - "" : "Not "); -} - /** * lpfc_sriov_hw_max_virtfn_show - Return maximum number of virtual functions * @dev: class converted to a Scsi_host structure. @@ -2705,9 +2645,6 @@ static DEVICE_ATTR(max_xri, S_IRUGO, lpfc_max_xri_show, NULL); static DEVICE_ATTR(used_xri, S_IRUGO, lpfc_used_xri_show, NULL); static DEVICE_ATTR(npiv_info, S_IRUGO, lpfc_npiv_info_show, NULL); static DEVICE_ATTR_RO(lpfc_temp_sensor); -static DEVICE_ATTR_RO(lpfc_fips_level); -static DEVICE_ATTR_RO(lpfc_fips_rev); -static DEVICE_ATTR_RO(lpfc_dss); static DEVICE_ATTR_RO(lpfc_sriov_hw_max_virtfn); static DEVICE_ATTR(protocol, S_IRUGO, lpfc_sli4_protocol_show, NULL); static DEVICE_ATTR(lpfc_xlane_supported, S_IRUGO, lpfc_oas_supported_show, @@ -6251,9 +6188,6 @@ struct device_attribute *lpfc_hba_attrs[] = { &dev_attr_pt, &dev_attr_txq_hw, &dev_attr_txcmplq_hw, - &dev_attr_lpfc_fips_level, - &dev_attr_lpfc_fips_rev, - &dev_attr_lpfc_dss, &dev_attr_lpfc_sriov_hw_max_virtfn, &dev_attr_protocol, &dev_attr_lpfc_xlane_supported, @@ -6289,8 +6223,6 @@ struct device_attribute *lpfc_vport_attrs[] = { &dev_attr_lpfc_max_scsicmpl_time, &dev_attr_lpfc_stat_data_ctrl, &dev_attr_lpfc_static_vport, - &dev_attr_lpfc_fips_level, - &dev_attr_lpfc_fips_rev, NULL, }; @@ -7399,7 +7331,6 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) lpfc_suppress_link_up_init(phba, lpfc_suppress_link_up); lpfc_delay_discovery_init(phba, lpfc_delay_discovery); lpfc_sli_mode_init(phba, lpfc_sli_mode); - phba->cfg_enable_dss = 1; lpfc_enable_mds_diags_init(phba, lpfc_enable_mds_diags); lpfc_ras_fwlog_buffsize_init(phba, lpfc_ras_fwlog_buffsize); lpfc_ras_fwlog_level_init(phba, lpfc_ras_fwlog_level); diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index ae51c0dbba0a..c20034b3101c 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -3262,8 +3262,7 @@ typedef struct { #endif #ifdef __BIG_ENDIAN_BITFIELD - uint32_t rsvd1 : 19; /* Reserved */ - uint32_t cdss : 1; /* Configure Data Security SLI */ + uint32_t rsvd1 : 20; /* Reserved */ uint32_t casabt : 1; /* Configure async abts status notice */ uint32_t rsvd2 : 2; /* Reserved */ uint32_t cbg : 1; /* Configure BlockGuard */ @@ -3287,12 +3286,10 @@ typedef struct { uint32_t cbg : 1; /* Configure BlockGuard */ uint32_t rsvd2 : 2; /* Reserved */ uint32_t casabt : 1; /* Configure async abts status notice */ - uint32_t cdss : 1; /* Configure Data Security SLI */ - uint32_t rsvd1 : 19; /* Reserved */ + uint32_t rsvd1 : 20; /* Reserved */ #endif #ifdef __BIG_ENDIAN_BITFIELD - uint32_t rsvd3 : 19; /* Reserved */ - uint32_t gdss : 1; /* Configure Data Security SLI */ + uint32_t rsvd3 : 20; /* Reserved */ uint32_t gasabt : 1; /* Grant async abts status notice */ uint32_t rsvd4 : 2; /* Reserved */ uint32_t gbg : 1; /* Grant BlockGuard */ @@ -3316,8 +3313,7 @@ typedef struct { uint32_t gbg : 1; /* Grant BlockGuard */ uint32_t rsvd4 : 2; /* Reserved */ uint32_t gasabt : 1; /* Grant async abts status notice */ - uint32_t gdss : 1; /* Configure Data Security SLI */ - uint32_t rsvd3 : 19; /* Reserved */ + uint32_t rsvd3 : 20; /* Reserved */ #endif #ifdef __BIG_ENDIAN_BITFIELD @@ -3339,15 +3335,11 @@ typedef struct { uint32_t rsvd6; /* Reserved */ #ifdef __BIG_ENDIAN_BITFIELD - uint32_t fips_rev : 3; /* FIPS Spec Revision */ - uint32_t fips_level : 4; /* FIPS Level */ - uint32_t sec_err : 9; /* security crypto error */ + uint32_t rsvd7 : 16; uint32_t max_vpi : 16; /* Max number of virt N-Ports */ #else /* __LITTLE_ENDIAN */ uint32_t max_vpi : 16; /* Max number of virt N-Ports */ - uint32_t sec_err : 9; /* security crypto error */ - uint32_t fips_level : 4; /* FIPS Level */ - uint32_t fips_rev : 3; /* FIPS Spec Revision */ + uint32_t rsvd7 : 16; #endif } CONFIG_PORT_VAR; diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c index d1773c01d2b3..e35b52b66d6c 100644 --- a/drivers/scsi/lpfc/lpfc_mbox.c +++ b/drivers/scsi/lpfc/lpfc_mbox.c @@ -1299,8 +1299,6 @@ lpfc_config_port(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) if (phba->sli_rev == LPFC_SLI_REV3 && phba->vpd.sli3Feat.cerbm) { if (phba->cfg_enable_bg) mb->un.varCfgPort.cbg = 1; /* configure BlockGuard */ - if (phba->cfg_enable_dss) - mb->un.varCfgPort.cdss = 1; /* Configure Security */ mb->un.varCfgPort.cerbm = 1; /* Request HBQs */ mb->un.varCfgPort.ccrp = 1; /* Command Ring Polling */ mb->un.varCfgPort.max_hbq = lpfc_sli_hbq_count(); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 52ccaebd6f2c..b6fb665e6ec4 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -5032,23 +5032,6 @@ lpfc_sli_config_port(struct lpfc_hba *phba, int sli_mode) } else phba->max_vpi = 0; - phba->fips_level = 0; - phba->fips_spec_rev = 0; - if (pmb->u.mb.un.varCfgPort.gdss) { - phba->sli3_options |= LPFC_SLI3_DSS_ENABLED; - phba->fips_level = pmb->u.mb.un.varCfgPort.fips_level; - phba->fips_spec_rev = pmb->u.mb.un.varCfgPort.fips_rev; - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "2850 Security Crypto Active. FIPS x%d " - "(Spec Rev: x%d)", - phba->fips_level, phba->fips_spec_rev); - } - if (pmb->u.mb.un.varCfgPort.sec_err) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "2856 Config Port Security Crypto " - "Error: x%x ", - pmb->u.mb.un.varCfgPort.sec_err); - } if (pmb->u.mb.un.varCfgPort.gerbm) phba->sli3_options |= LPFC_SLI3_HBQ_ENABLED; if (pmb->u.mb.un.varCfgPort.gcrp) From d75e119e60ceaaaaf32624f5a82842df4d78aed7 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 22 Mar 2020 11:13:04 -0700 Subject: [PATCH 035/280] scsi: lpfc: Update lpfc version to 12.8.0.0 Update lpfc version to 12.8.0.0 Link: https://lore.kernel.org/r/20200322181304.37655-13-jsmart2021@gmail.com Signed-off-by: James Smart Signed-off-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index c4ab006e6ecc..ca40c47cfbe0 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "12.6.0.4" +#define LPFC_DRIVER_VERSION "12.8.0.0" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ From 394b949f2d919cef8e2ff490862572c74df09de8 Mon Sep 17 00:00:00 2001 From: Subhash Jadavani Date: Thu, 26 Mar 2020 02:25:40 -0700 Subject: [PATCH 036/280] scsi: ufs: Clean up ufshcd_scale_clks() and clock scaling error out path This change introduces a func ufshcd_set_clk_freq() to explicitly set clock frequency so that it can be used in reset_and_restore path and in ufshcd_scale_clks(). This change also cleans up the clock scaling error out path. [mkp: commit desc] Link: https://lore.kernel.org/r/1585214742-5466-2-git-send-email-cang@codeaurora.org Fixes: a3cd5ec55f6c ("scsi: ufs: add load based scaling of UFS gear") Reviewed-by: Bean Huo Acked-by: Avri Altman Signed-off-by: Subhash Jadavani Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 65 ++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 5f20922101cf..79cbd35675cc 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -868,28 +868,29 @@ static bool ufshcd_is_unipro_pa_params_tuning_req(struct ufs_hba *hba) return false; } -static int ufshcd_scale_clks(struct ufs_hba *hba, bool scale_up) +/** + * ufshcd_set_clk_freq - set UFS controller clock frequencies + * @hba: per adapter instance + * @scale_up: If True, set max possible frequency othewise set low frequency + * + * Returns 0 if successful + * Returns < 0 for any other errors + */ +static int ufshcd_set_clk_freq(struct ufs_hba *hba, bool scale_up) { int ret = 0; struct ufs_clk_info *clki; struct list_head *head = &hba->clk_list_head; - ktime_t start = ktime_get(); - bool clk_state_changed = false; if (list_empty(head)) goto out; - ret = ufshcd_vops_clk_scale_notify(hba, scale_up, PRE_CHANGE); - if (ret) - return ret; - list_for_each_entry(clki, head, list) { if (!IS_ERR_OR_NULL(clki->clk)) { if (scale_up && clki->max_freq) { if (clki->curr_freq == clki->max_freq) continue; - clk_state_changed = true; ret = clk_set_rate(clki->clk, clki->max_freq); if (ret) { dev_err(hba->dev, "%s: %s clk set rate(%dHz) failed, %d\n", @@ -908,7 +909,6 @@ static int ufshcd_scale_clks(struct ufs_hba *hba, bool scale_up) if (clki->curr_freq == clki->min_freq) continue; - clk_state_changed = true; ret = clk_set_rate(clki->clk, clki->min_freq); if (ret) { dev_err(hba->dev, "%s: %s clk set rate(%dHz) failed, %d\n", @@ -927,11 +927,37 @@ static int ufshcd_scale_clks(struct ufs_hba *hba, bool scale_up) clki->name, clk_get_rate(clki->clk)); } +out: + return ret; +} + +/** + * ufshcd_scale_clks - scale up or scale down UFS controller clocks + * @hba: per adapter instance + * @scale_up: True if scaling up and false if scaling down + * + * Returns 0 if successful + * Returns < 0 for any other errors + */ +static int ufshcd_scale_clks(struct ufs_hba *hba, bool scale_up) +{ + int ret = 0; + ktime_t start = ktime_get(); + + ret = ufshcd_vops_clk_scale_notify(hba, scale_up, PRE_CHANGE); + if (ret) + goto out; + + ret = ufshcd_set_clk_freq(hba, scale_up); + if (ret) + goto out; + ret = ufshcd_vops_clk_scale_notify(hba, scale_up, POST_CHANGE); + if (ret) + ufshcd_set_clk_freq(hba, !scale_up); out: - if (clk_state_changed) - trace_ufshcd_profile_clk_scaling(dev_name(hba->dev), + trace_ufshcd_profile_clk_scaling(dev_name(hba->dev), (scale_up ? "up" : "down"), ktime_to_us(ktime_sub(ktime_get(), start)), ret); return ret; @@ -1119,35 +1145,32 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up) ret = ufshcd_clock_scaling_prepare(hba); if (ret) - return ret; + goto out; /* scale down the gear before scaling down clocks */ if (!scale_up) { ret = ufshcd_scale_gear(hba, false); if (ret) - goto out; + goto out_unprepare; } ret = ufshcd_scale_clks(hba, scale_up); if (ret) { if (!scale_up) ufshcd_scale_gear(hba, true); - goto out; + goto out_unprepare; } /* scale up the gear after scaling up clocks */ if (scale_up) { ret = ufshcd_scale_gear(hba, true); - if (ret) { + if (ret) ufshcd_scale_clks(hba, false); - goto out; - } } - ret = ufshcd_vops_clk_scale_notify(hba, scale_up, POST_CHANGE); - -out: +out_unprepare: ufshcd_clock_scaling_unprepare(hba); +out: ufshcd_release(hba); return ret; } @@ -6292,7 +6315,7 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) spin_unlock_irqrestore(hba->host->host_lock, flags); /* scale up clocks to max frequency before full reinitialization */ - ufshcd_scale_clks(hba, true); + ufshcd_set_clk_freq(hba, true); err = ufshcd_hba_enable(hba); if (err) From 087c5efafa48661ac98184fbf8826a1a79dc5784 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 27 Mar 2020 17:53:28 +0800 Subject: [PATCH 037/280] scsi: ufs: export ufshcd_link_recovery Export ufshcd_link_recovery to allow vendors to recover failed link in vendor's callbacks. Link: https://lore.kernel.org/r/20200327095329.10083-2-stanley.chu@mediatek.com Reviewed-by: Avri Altman Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 3 ++- drivers/scsi/ufs/ufshcd.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 79cbd35675cc..8d1c70ac44b8 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3808,7 +3808,7 @@ static int ufshcd_uic_change_pwr_mode(struct ufs_hba *hba, u8 mode) return ret; } -static int ufshcd_link_recovery(struct ufs_hba *hba) +int ufshcd_link_recovery(struct ufs_hba *hba) { int ret; unsigned long flags; @@ -3835,6 +3835,7 @@ static int ufshcd_link_recovery(struct ufs_hba *hba) return ret; } +EXPORT_SYMBOL_GPL(ufshcd_link_recovery); static int __ufshcd_uic_hibern8_enter(struct ufs_hba *hba) { diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index dd1ee277069a..fafc781fead9 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -788,6 +788,7 @@ int ufshcd_alloc_host(struct device *, struct ufs_hba **); void ufshcd_dealloc_host(struct ufs_hba *); int ufshcd_hba_enable(struct ufs_hba *hba); int ufshcd_init(struct ufs_hba * , void __iomem * , unsigned int); +int ufshcd_link_recovery(struct ufs_hba *hba); int ufshcd_make_hba_operational(struct ufs_hba *hba); void ufshcd_remove(struct ufs_hba *); int ufshcd_uic_hibern8_exit(struct ufs_hba *hba); From 8033824bbf3c15e6321045a3f51d6f6947d89d47 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 27 Mar 2020 17:53:29 +0800 Subject: [PATCH 038/280] scsi: ufs-mediatek: add error recovery for suspend and resume Once fail happens during suspend and resume flow if the desired low power link state is H8, link recovery is required for MediaTek UFS controller. For resume flow, since power and clocks are already enabled before invoking vendor's resume callback, simply using ufshcd_link_recovery() inside callback is fine. For suspend flow, the device power enters low power mode or is disabled before suspend callback, thus ufshcd_link_recovery() can not be directly used in vendor callback. One solution is to set the link to off state and then ufshcd_host_reset_and_restore() will be executed by ufshcd_suspend(). Link: https://lore.kernel.org/r/20200327095329.10083-3-stanley.chu@mediatek.com Reviewed-by: Avri Altman Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index 40a66b31b31f..673c16596fb2 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -499,8 +499,15 @@ static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (ufshcd_is_link_hibern8(hba)) { err = ufs_mtk_link_set_lpm(hba); - if (err) + if (err) { + /* + * Set link as off state enforcedly to trigger + * ufshcd_host_reset_and_restore() in ufshcd_suspend() + * for completed host reset. + */ + ufshcd_set_link_off(hba); return -EAGAIN; + } } if (!ufshcd_is_link_active(hba)) @@ -519,8 +526,10 @@ static int ufs_mtk_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (ufshcd_is_link_hibern8(hba)) { err = ufs_mtk_link_set_hpm(hba); - if (err) + if (err) { + err = ufshcd_link_recovery(hba); return err; + } } return 0; From 95b1b51e776d89882ee67077976784b8812fbf9f Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 27 Mar 2020 15:19:50 +0100 Subject: [PATCH 039/280] scsi: target: use #define for xcopy descriptor len Link: https://lore.kernel.org/r/20200327141954.955-2-ddiss@suse.de Reviewed-by: Christoph Hellwig Signed-off-by: David Disseldorp Signed-off-by: Martin K. Petersen --- drivers/target/target_core_xcopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 425c1070de08..7e5b13da0c20 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -134,7 +134,7 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op * Assigned designator */ desig_len = desc[7]; - if (desig_len != 16) { + if (desig_len != XCOPY_NAA_IEEE_REGEX_LEN) { pr_err("XCOPY 0xe4: invalid desig_len: %d\n", (int)desig_len); return -EINVAL; } From 267fc83f8836ad6168ed264a0823543439abb3be Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 27 Mar 2020 15:19:51 +0100 Subject: [PATCH 040/280] scsi: target: drop xcopy DISK BLOCK LENGTH debug The DISK BLOCK LENGTH field is carried with XCOPY target descriptors on the wire, but is currently unmarshalled during 0x02 segment descriptor passing. The unmarshalled value is currently unused, so drop it. Link: https://lore.kernel.org/r/20200327141954.955-3-ddiss@suse.de Reviewed-by: Christoph Hellwig Signed-off-by: David Disseldorp Signed-off-by: Martin K. Petersen --- drivers/target/target_core_xcopy.c | 5 ----- drivers/target/target_core_xcopy.h | 1 - 2 files changed, 6 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 7e5b13da0c20..66b68295c50f 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -315,11 +315,6 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op xop->nolb, (unsigned long long)xop->src_lba, (unsigned long long)xop->dst_lba); - if (dc != 0) { - xop->dbl = get_unaligned_be24(&desc[29]); - - pr_debug("XCOPY seg desc 0x02: DC=1 w/ dbl: %u\n", xop->dbl); - } return 0; } diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index 26ba4c3c9cff..0840b03e8faa 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -35,7 +35,6 @@ struct xcopy_op { unsigned short stdi; unsigned short dtdi; unsigned short nolb; - unsigned int dbl; struct xcopy_pt_cmd *src_pt_cmd; struct xcopy_pt_cmd *dst_pt_cmd; From 0ad08996da05b6b735d4963dceab7d2a4043607c Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 27 Mar 2020 15:19:52 +0100 Subject: [PATCH 041/280] scsi: target: avoid per-loop XCOPY buffer allocations The main target_xcopy_do_work() loop unnecessarily allocates an I/O buffer with each synchronous READ / WRITE pair. This commit significantly reduces allocations by reusing the XCOPY I/O buffer when possible. Link: https://lore.kernel.org/r/20200327141954.955-4-ddiss@suse.de Reviewed-by: Christoph Hellwig Signed-off-by: David Disseldorp Signed-off-by: Martin K. Petersen --- drivers/target/target_core_xcopy.c | 96 ++++++++++-------------------- drivers/target/target_core_xcopy.h | 1 + 2 files changed, 31 insertions(+), 66 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 66b68295c50f..d61c41f33f81 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -499,7 +499,6 @@ void target_xcopy_release_pt(void) * @cdb: SCSI CDB to be copied into @xpt_cmd. * @remote_port: If false, use the LUN through which the XCOPY command has * been received. If true, use @se_dev->xcopy_lun. - * @alloc_mem: Whether or not to allocate an SGL list. * * Set up a SCSI command (READ or WRITE) that will be used to execute an * XCOPY command. @@ -509,12 +508,9 @@ static int target_xcopy_setup_pt_cmd( struct xcopy_op *xop, struct se_device *se_dev, unsigned char *cdb, - bool remote_port, - bool alloc_mem) + bool remote_port) { struct se_cmd *cmd = &xpt_cmd->se_cmd; - sense_reason_t sense_rc; - int ret = 0, rc; /* * Setup LUN+port to honor reservations based upon xop->op_origin for @@ -530,46 +526,17 @@ static int target_xcopy_setup_pt_cmd( cmd->se_cmd_flags |= SCF_SE_LUN_CMD; cmd->tag = 0; - sense_rc = target_setup_cmd_from_cdb(cmd, cdb); - if (sense_rc) { - ret = -EINVAL; - goto out; - } + if (target_setup_cmd_from_cdb(cmd, cdb)) + return -EINVAL; - if (alloc_mem) { - rc = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, - cmd->data_length, false, false); - if (rc < 0) { - ret = rc; - goto out; - } - /* - * Set this bit so that transport_free_pages() allows the - * caller to release SGLs + physical memory allocated by - * transport_generic_get_mem().. - */ - cmd->se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; - } else { - /* - * Here the previously allocated SGLs for the internal READ - * are mapped zero-copy to the internal WRITE. - */ - sense_rc = transport_generic_map_mem_to_cmd(cmd, - xop->xop_data_sg, xop->xop_data_nents, - NULL, 0); - if (sense_rc) { - ret = -EINVAL; - goto out; - } + if (transport_generic_map_mem_to_cmd(cmd, xop->xop_data_sg, + xop->xop_data_nents, NULL, 0)) + return -EINVAL; - pr_debug("Setup PASSTHROUGH_NOALLOC t_data_sg: %p t_data_nents:" - " %u\n", cmd->t_data_sg, cmd->t_data_nents); - } + pr_debug("Setup PASSTHROUGH_NOALLOC t_data_sg: %p t_data_nents:" + " %u\n", cmd->t_data_sg, cmd->t_data_nents); return 0; - -out: - return ret; } static int target_xcopy_issue_pt_cmd(struct xcopy_pt_cmd *xpt_cmd) @@ -626,15 +593,13 @@ static int target_xcopy_read_source( xop->src_pt_cmd = xpt_cmd; rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, src_dev, &cdb[0], - remote_port, true); + remote_port); if (rc < 0) { ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; transport_generic_free_cmd(se_cmd, 0); return rc; } - xop->xop_data_sg = se_cmd->t_data_sg; - xop->xop_data_nents = se_cmd->t_data_nents; pr_debug("XCOPY-READ: Saved xop->xop_data_sg: %p, num: %u for READ" " memory\n", xop->xop_data_sg, xop->xop_data_nents); @@ -644,12 +609,6 @@ static int target_xcopy_read_source( transport_generic_free_cmd(se_cmd, 0); return rc; } - /* - * Clear off the allocated t_data_sg, that has been saved for - * zero-copy WRITE submission reuse in struct xcopy_op.. - */ - se_cmd->t_data_sg = NULL; - se_cmd->t_data_nents = 0; return 0; } @@ -688,19 +647,9 @@ static int target_xcopy_write_destination( xop->dst_pt_cmd = xpt_cmd; rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, dst_dev, &cdb[0], - remote_port, false); + remote_port); if (rc < 0) { - struct se_cmd *src_cmd = &xop->src_pt_cmd->se_cmd; ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; - /* - * If the failure happened before the t_mem_list hand-off in - * target_xcopy_setup_pt_cmd(), Reset memory + clear flag so that - * core releases this memory on error during X-COPY WRITE I/O. - */ - src_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; - src_cmd->t_data_sg = xop->xop_data_sg; - src_cmd->t_data_nents = xop->xop_data_nents; - transport_generic_free_cmd(se_cmd, 0); return rc; } @@ -708,7 +657,6 @@ static int target_xcopy_write_destination( rc = target_xcopy_issue_pt_cmd(xpt_cmd); if (rc < 0) { ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; - se_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; transport_generic_free_cmd(se_cmd, 0); return rc; } @@ -724,7 +672,7 @@ static void target_xcopy_do_work(struct work_struct *work) sector_t src_lba, dst_lba, end_lba; unsigned int max_sectors; int rc = 0; - unsigned short nolb, cur_nolb, max_nolb, copied_nolb = 0; + unsigned short nolb, max_nolb, copied_nolb = 0; if (target_parse_xcopy_cmd(xop) != TCM_NO_SENSE) goto err_free; @@ -754,7 +702,23 @@ static void target_xcopy_do_work(struct work_struct *work) (unsigned long long)src_lba, (unsigned long long)dst_lba); while (src_lba < end_lba) { - cur_nolb = min(nolb, max_nolb); + unsigned short cur_nolb = min(nolb, max_nolb); + u32 cur_bytes = cur_nolb * src_dev->dev_attrib.block_size; + + if (cur_bytes != xop->xop_data_bytes) { + /* + * (Re)allocate a buffer large enough to hold the XCOPY + * I/O size, which can be reused each read / write loop. + */ + target_free_sgl(xop->xop_data_sg, xop->xop_data_nents); + rc = target_alloc_sgl(&xop->xop_data_sg, + &xop->xop_data_nents, + cur_bytes, + false, false); + if (rc < 0) + goto out; + xop->xop_data_bytes = cur_bytes; + } pr_debug("target_xcopy_do_work: Calling read src_dev: %p src_lba: %llu," " cur_nolb: %hu\n", src_dev, (unsigned long long)src_lba, cur_nolb); @@ -785,12 +749,11 @@ static void target_xcopy_do_work(struct work_struct *work) nolb -= cur_nolb; transport_generic_free_cmd(&xop->src_pt_cmd->se_cmd, 0); - xop->dst_pt_cmd->se_cmd.se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; - transport_generic_free_cmd(&xop->dst_pt_cmd->se_cmd, 0); } xcopy_pt_undepend_remotedev(xop); + target_free_sgl(xop->xop_data_sg, xop->xop_data_nents); kfree(xop); pr_debug("target_xcopy_do_work: Final src_lba: %llu, dst_lba: %llu\n", @@ -804,6 +767,7 @@ static void target_xcopy_do_work(struct work_struct *work) out: xcopy_pt_undepend_remotedev(xop); + target_free_sgl(xop->xop_data_sg, xop->xop_data_nents); err_free: kfree(xop); diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index 0840b03e8faa..9558974185ea 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -39,6 +39,7 @@ struct xcopy_op { struct xcopy_pt_cmd *src_pt_cmd; struct xcopy_pt_cmd *dst_pt_cmd; + u32 xop_data_bytes; u32 xop_data_nents; struct scatterlist *xop_data_sg; struct work_struct xop_work; From 5f306aff34809fca68bb36476e66552ac241a9c4 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 27 Mar 2020 15:19:53 +0100 Subject: [PATCH 042/280] scsi: target: increase XCOPY I/O size The I/O size is already bound by dev_attrib.hw_max_sectors, so increase the hardcoded XCOPY_MAX_SECTORS maximum to improve performance against backstores with high-latency. Link: https://lore.kernel.org/r/20200327141954.955-5-ddiss@suse.de Reviewed-by: Christoph Hellwig Signed-off-by: David Disseldorp Signed-off-by: Martin K. Petersen --- drivers/target/target_core_xcopy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index 9558974185ea..f1aaf7140798 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -5,7 +5,7 @@ #define XCOPY_TARGET_DESC_LEN 32 #define XCOPY_SEGMENT_DESC_LEN 28 #define XCOPY_NAA_IEEE_REGEX_LEN 16 -#define XCOPY_MAX_SECTORS 1024 +#define XCOPY_MAX_SECTORS 4096 /* * SPC4r37 6.4.6.1 From b92fcfcb687de7d08278a557faff3a7b4a672cc7 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 27 Mar 2020 15:19:54 +0100 Subject: [PATCH 043/280] scsi: target: use the stack for XCOPY passthrough cmds Reads and writes in the XCOPY loop are synchronous, so needn't be heap allocated / freed with each loop. Link: https://lore.kernel.org/r/20200327141954.955-6-ddiss@suse.de Reviewed-by: Christoph Hellwig Signed-off-by: David Disseldorp Signed-off-by: Martin K. Petersen --- drivers/target/target_core_xcopy.c | 84 +++++++++++------------------- drivers/target/target_core_xcopy.h | 5 -- 2 files changed, 31 insertions(+), 58 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index d61c41f33f81..bd3ed6ce7571 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -410,7 +410,8 @@ static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) struct xcopy_pt_cmd *xpt_cmd = container_of(se_cmd, struct xcopy_pt_cmd, se_cmd); - kfree(xpt_cmd); + /* xpt_cmd is on the stack, nothing to free here */ + pr_debug("xpt_cmd done: %p\n", xpt_cmd); } static int xcopy_pt_check_stop_free(struct se_cmd *se_cmd) @@ -566,20 +567,15 @@ static int target_xcopy_read_source( sector_t src_lba, u32 src_sectors) { - struct xcopy_pt_cmd *xpt_cmd; - struct se_cmd *se_cmd; + struct xcopy_pt_cmd xpt_cmd; + struct se_cmd *se_cmd = &xpt_cmd.se_cmd; u32 length = (src_sectors * src_dev->dev_attrib.block_size); int rc; unsigned char cdb[16]; bool remote_port = (xop->op_origin == XCOL_DEST_RECV_OP); - xpt_cmd = kzalloc(sizeof(struct xcopy_pt_cmd), GFP_KERNEL); - if (!xpt_cmd) { - pr_err("Unable to allocate xcopy_pt_cmd\n"); - return -ENOMEM; - } - init_completion(&xpt_cmd->xpt_passthrough_sem); - se_cmd = &xpt_cmd->se_cmd; + memset(&xpt_cmd, 0, sizeof(xpt_cmd)); + init_completion(&xpt_cmd.xpt_passthrough_sem); memset(&cdb[0], 0, 16); cdb[0] = READ_16; @@ -589,28 +585,24 @@ static int target_xcopy_read_source( (unsigned long long)src_lba, src_sectors, length); transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length, - DMA_FROM_DEVICE, 0, &xpt_cmd->sense_buffer[0]); - xop->src_pt_cmd = xpt_cmd; + DMA_FROM_DEVICE, 0, &xpt_cmd.sense_buffer[0]); - rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, src_dev, &cdb[0], + rc = target_xcopy_setup_pt_cmd(&xpt_cmd, xop, src_dev, &cdb[0], remote_port); if (rc < 0) { - ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; - transport_generic_free_cmd(se_cmd, 0); - return rc; + ec_cmd->scsi_status = se_cmd->scsi_status; + goto out; } pr_debug("XCOPY-READ: Saved xop->xop_data_sg: %p, num: %u for READ" " memory\n", xop->xop_data_sg, xop->xop_data_nents); - rc = target_xcopy_issue_pt_cmd(xpt_cmd); - if (rc < 0) { - ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; - transport_generic_free_cmd(se_cmd, 0); - return rc; - } - - return 0; + rc = target_xcopy_issue_pt_cmd(&xpt_cmd); + if (rc < 0) + ec_cmd->scsi_status = se_cmd->scsi_status; +out: + transport_generic_free_cmd(se_cmd, 0); + return rc; } static int target_xcopy_write_destination( @@ -620,20 +612,15 @@ static int target_xcopy_write_destination( sector_t dst_lba, u32 dst_sectors) { - struct xcopy_pt_cmd *xpt_cmd; - struct se_cmd *se_cmd; + struct xcopy_pt_cmd xpt_cmd; + struct se_cmd *se_cmd = &xpt_cmd.se_cmd; u32 length = (dst_sectors * dst_dev->dev_attrib.block_size); int rc; unsigned char cdb[16]; bool remote_port = (xop->op_origin == XCOL_SOURCE_RECV_OP); - xpt_cmd = kzalloc(sizeof(struct xcopy_pt_cmd), GFP_KERNEL); - if (!xpt_cmd) { - pr_err("Unable to allocate xcopy_pt_cmd\n"); - return -ENOMEM; - } - init_completion(&xpt_cmd->xpt_passthrough_sem); - se_cmd = &xpt_cmd->se_cmd; + memset(&xpt_cmd, 0, sizeof(xpt_cmd)); + init_completion(&xpt_cmd.xpt_passthrough_sem); memset(&cdb[0], 0, 16); cdb[0] = WRITE_16; @@ -643,25 +630,21 @@ static int target_xcopy_write_destination( (unsigned long long)dst_lba, dst_sectors, length); transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length, - DMA_TO_DEVICE, 0, &xpt_cmd->sense_buffer[0]); - xop->dst_pt_cmd = xpt_cmd; + DMA_TO_DEVICE, 0, &xpt_cmd.sense_buffer[0]); - rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, dst_dev, &cdb[0], + rc = target_xcopy_setup_pt_cmd(&xpt_cmd, xop, dst_dev, &cdb[0], remote_port); if (rc < 0) { - ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; - transport_generic_free_cmd(se_cmd, 0); - return rc; + ec_cmd->scsi_status = se_cmd->scsi_status; + goto out; } - rc = target_xcopy_issue_pt_cmd(xpt_cmd); - if (rc < 0) { - ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; - transport_generic_free_cmd(se_cmd, 0); - return rc; - } - - return 0; + rc = target_xcopy_issue_pt_cmd(&xpt_cmd); + if (rc < 0) + ec_cmd->scsi_status = se_cmd->scsi_status; +out: + transport_generic_free_cmd(se_cmd, 0); + return rc; } static void target_xcopy_do_work(struct work_struct *work) @@ -736,10 +719,8 @@ static void target_xcopy_do_work(struct work_struct *work) rc = target_xcopy_write_destination(ec_cmd, xop, dst_dev, dst_lba, cur_nolb); - if (rc < 0) { - transport_generic_free_cmd(&xop->src_pt_cmd->se_cmd, 0); + if (rc < 0) goto out; - } dst_lba += cur_nolb; pr_debug("target_xcopy_do_work: Incremented WRITE dst_lba to %llu\n", @@ -747,9 +728,6 @@ static void target_xcopy_do_work(struct work_struct *work) copied_nolb += cur_nolb; nolb -= cur_nolb; - - transport_generic_free_cmd(&xop->src_pt_cmd->se_cmd, 0); - transport_generic_free_cmd(&xop->dst_pt_cmd->se_cmd, 0); } xcopy_pt_undepend_remotedev(xop); diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index f1aaf7140798..c56a1bde9417 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -18,8 +18,6 @@ enum xcopy_origin_list { XCOL_DEST_RECV_OP = 0x02, }; -struct xcopy_pt_cmd; - struct xcopy_op { int op_origin; @@ -36,9 +34,6 @@ struct xcopy_op { unsigned short dtdi; unsigned short nolb; - struct xcopy_pt_cmd *src_pt_cmd; - struct xcopy_pt_cmd *dst_pt_cmd; - u32 xop_data_bytes; u32 xop_data_nents; struct scatterlist *xop_data_sg; From 77331115e220925af1f52e18ac99e37a0b0c10ad Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Thu, 26 Mar 2020 22:48:47 -0700 Subject: [PATCH 044/280] scsi: bnx2fc: Process the RQE with CQE in interrupt context Filesystem goes to read-only after continuous error injection because RQE was handled in deferred context, leading to mismatch between CQE and RQE. Specifically, this patch makes the following changes: - Process the RQE with CQE in interrupt context, before putting it into the work queue. - Producer and consumer indices are also updated in the interrupt context to guarantee the the order of processing. [mkp: fixed bad indentation] Link: https://lore.kernel.org/r/20200327054849.15947-2-skashyap@marvell.com Signed-off-by: Javed Hasan Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc.h | 11 +++- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 8 ++- drivers/scsi/bnx2fc/bnx2fc_hwi.c | 103 ++++++++++++++++++++++-------- drivers/scsi/bnx2fc/bnx2fc_io.c | 28 ++------ 4 files changed, 96 insertions(+), 54 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h index 3b84db8d13a9..15fa8e2a9ad8 100644 --- a/drivers/scsi/bnx2fc/bnx2fc.h +++ b/drivers/scsi/bnx2fc/bnx2fc.h @@ -482,7 +482,10 @@ struct io_bdt { struct bnx2fc_work { struct list_head list; struct bnx2fc_rport *tgt; + struct fcoe_task_ctx_entry *task; + unsigned char rq_data[BNX2FC_RQ_BUF_SZ]; u16 wqe; + u8 num_rq; }; struct bnx2fc_unsol_els { struct fc_lport *lport; @@ -550,7 +553,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, enum fc_rport_event event); void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req, struct fcoe_task_ctx_entry *task, - u8 num_rq); + u8 num_rq, unsigned char *rq_data); void bnx2fc_process_cleanup_compl(struct bnx2fc_cmd *io_req, struct fcoe_task_ctx_entry *task, u8 num_rq); @@ -559,7 +562,7 @@ void bnx2fc_process_abts_compl(struct bnx2fc_cmd *io_req, u8 num_rq); void bnx2fc_process_tm_compl(struct bnx2fc_cmd *io_req, struct fcoe_task_ctx_entry *task, - u8 num_rq); + u8 num_rq, unsigned char *rq_data); void bnx2fc_process_els_compl(struct bnx2fc_cmd *els_req, struct fcoe_task_ctx_entry *task, u8 num_rq); @@ -577,7 +580,9 @@ struct fc_seq *bnx2fc_elsct_send(struct fc_lport *lport, u32 did, void *arg, u32 timeout); void bnx2fc_arm_cq(struct bnx2fc_rport *tgt); int bnx2fc_process_new_cqes(struct bnx2fc_rport *tgt); -void bnx2fc_process_cq_compl(struct bnx2fc_rport *tgt, u16 wqe); +void bnx2fc_process_cq_compl(struct bnx2fc_rport *tgt, u16 wqe, + unsigned char *rq_data, u8 num_rq, + struct fcoe_task_ctx_entry *task); struct bnx2fc_rport *bnx2fc_tgt_lookup(struct fcoe_port *port, u32 port_id); void bnx2fc_process_l2_frame_compl(struct bnx2fc_rport *tgt, diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index b4bfab5edf8f..1cbb431fa682 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -660,7 +660,10 @@ static int bnx2fc_percpu_io_thread(void *arg) list_for_each_entry_safe(work, tmp, &work_list, list) { list_del_init(&work->list); - bnx2fc_process_cq_compl(work->tgt, work->wqe); + bnx2fc_process_cq_compl(work->tgt, work->wqe, + work->rq_data, + work->num_rq, + work->task); kfree(work); } @@ -2655,7 +2658,8 @@ static int bnx2fc_cpu_offline(unsigned int cpu) /* Free all work in the list */ list_for_each_entry_safe(work, tmp, &p->work_list, list) { list_del_init(&work->list); - bnx2fc_process_cq_compl(work->tgt, work->wqe); + bnx2fc_process_cq_compl(work->tgt, work->wqe, work->rq_data, + work->num_rq, work->task); kfree(work); } diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c index 6f8335ddb1f2..eb41b0080f57 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c +++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c @@ -863,36 +863,22 @@ static void bnx2fc_process_unsol_compl(struct bnx2fc_rport *tgt, u16 wqe) } } -void bnx2fc_process_cq_compl(struct bnx2fc_rport *tgt, u16 wqe) +void bnx2fc_process_cq_compl(struct bnx2fc_rport *tgt, u16 wqe, + unsigned char *rq_data, u8 num_rq, + struct fcoe_task_ctx_entry *task) { - struct fcoe_task_ctx_entry *task; - struct fcoe_task_ctx_entry *task_page; struct fcoe_port *port = tgt->port; struct bnx2fc_interface *interface = port->priv; struct bnx2fc_hba *hba = interface->hba; struct bnx2fc_cmd *io_req; - int task_idx, index; + u16 xid; u8 cmd_type; u8 rx_state = 0; - u8 num_rq; spin_lock_bh(&tgt->tgt_lock); + xid = wqe & FCOE_PEND_WQ_CQE_TASK_ID; - if (xid >= hba->max_tasks) { - printk(KERN_ERR PFX "ERROR:xid out of range\n"); - spin_unlock_bh(&tgt->tgt_lock); - return; - } - task_idx = xid / BNX2FC_TASKS_PER_PAGE; - index = xid % BNX2FC_TASKS_PER_PAGE; - task_page = (struct fcoe_task_ctx_entry *)hba->task_ctx[task_idx]; - task = &(task_page[index]); - - num_rq = ((task->rxwr_txrd.var_ctx.rx_flags & - FCOE_TCE_RX_WR_TX_RD_VAR_NUM_RQ_WQE) >> - FCOE_TCE_RX_WR_TX_RD_VAR_NUM_RQ_WQE_SHIFT); - io_req = (struct bnx2fc_cmd *)hba->cmd_mgr->cmds[xid]; if (io_req == NULL) { @@ -912,7 +898,8 @@ void bnx2fc_process_cq_compl(struct bnx2fc_rport *tgt, u16 wqe) switch (cmd_type) { case BNX2FC_SCSI_CMD: if (rx_state == FCOE_TASK_RX_STATE_COMPLETED) { - bnx2fc_process_scsi_cmd_compl(io_req, task, num_rq); + bnx2fc_process_scsi_cmd_compl(io_req, task, num_rq, + rq_data); spin_unlock_bh(&tgt->tgt_lock); return; } @@ -929,7 +916,7 @@ void bnx2fc_process_cq_compl(struct bnx2fc_rport *tgt, u16 wqe) case BNX2FC_TASK_MGMT_CMD: BNX2FC_IO_DBG(io_req, "Processing TM complete\n"); - bnx2fc_process_tm_compl(io_req, task, num_rq); + bnx2fc_process_tm_compl(io_req, task, num_rq, rq_data); break; case BNX2FC_ABTS: @@ -987,7 +974,9 @@ void bnx2fc_arm_cq(struct bnx2fc_rport *tgt) } -static struct bnx2fc_work *bnx2fc_alloc_work(struct bnx2fc_rport *tgt, u16 wqe) +static struct bnx2fc_work *bnx2fc_alloc_work(struct bnx2fc_rport *tgt, u16 wqe, + unsigned char *rq_data, u8 num_rq, + struct fcoe_task_ctx_entry *task) { struct bnx2fc_work *work; work = kzalloc(sizeof(struct bnx2fc_work), GFP_ATOMIC); @@ -997,29 +986,87 @@ static struct bnx2fc_work *bnx2fc_alloc_work(struct bnx2fc_rport *tgt, u16 wqe) INIT_LIST_HEAD(&work->list); work->tgt = tgt; work->wqe = wqe; + work->num_rq = num_rq; + work->task = task; + if (rq_data) + memcpy(work->rq_data, rq_data, BNX2FC_RQ_BUF_SZ); + return work; } /* Pending work request completion */ -static void bnx2fc_pending_work(struct bnx2fc_rport *tgt, unsigned int wqe) +static bool bnx2fc_pending_work(struct bnx2fc_rport *tgt, unsigned int wqe) { unsigned int cpu = wqe % num_possible_cpus(); struct bnx2fc_percpu_s *fps; struct bnx2fc_work *work; + struct fcoe_task_ctx_entry *task; + struct fcoe_task_ctx_entry *task_page; + struct fcoe_port *port = tgt->port; + struct bnx2fc_interface *interface = port->priv; + struct bnx2fc_hba *hba = interface->hba; + unsigned char *rq_data = NULL; + unsigned char rq_data_buff[BNX2FC_RQ_BUF_SZ]; + int task_idx, index; + unsigned char *dummy; + u16 xid; + u8 num_rq; + int i; + + xid = wqe & FCOE_PEND_WQ_CQE_TASK_ID; + if (xid >= hba->max_tasks) { + pr_err(PFX "ERROR:xid out of range\n"); + return 0; + } + + task_idx = xid / BNX2FC_TASKS_PER_PAGE; + index = xid % BNX2FC_TASKS_PER_PAGE; + task_page = (struct fcoe_task_ctx_entry *)hba->task_ctx[task_idx]; + task = &task_page[index]; + + num_rq = ((task->rxwr_txrd.var_ctx.rx_flags & + FCOE_TCE_RX_WR_TX_RD_VAR_NUM_RQ_WQE) >> + FCOE_TCE_RX_WR_TX_RD_VAR_NUM_RQ_WQE_SHIFT); + + memset(rq_data_buff, 0, BNX2FC_RQ_BUF_SZ); + + if (!num_rq) + goto num_rq_zero; + + rq_data = bnx2fc_get_next_rqe(tgt, 1); + + if (num_rq > 1) { + /* We do not need extra sense data */ + for (i = 1; i < num_rq; i++) + dummy = bnx2fc_get_next_rqe(tgt, 1); + } + + if (rq_data) + memcpy(rq_data_buff, rq_data, BNX2FC_RQ_BUF_SZ); + + /* return RQ entries */ + for (i = 0; i < num_rq; i++) + bnx2fc_return_rqe(tgt, 1); + +num_rq_zero: fps = &per_cpu(bnx2fc_percpu, cpu); spin_lock_bh(&fps->fp_work_lock); if (fps->iothread) { - work = bnx2fc_alloc_work(tgt, wqe); + work = bnx2fc_alloc_work(tgt, wqe, rq_data_buff, + num_rq, task); if (work) { list_add_tail(&work->list, &fps->work_list); wake_up_process(fps->iothread); spin_unlock_bh(&fps->fp_work_lock); - return; + return 1; } } spin_unlock_bh(&fps->fp_work_lock); - bnx2fc_process_cq_compl(tgt, wqe); + bnx2fc_process_cq_compl(tgt, wqe, + rq_data_buff, num_rq, task); + + return 1; } int bnx2fc_process_new_cqes(struct bnx2fc_rport *tgt) @@ -1056,8 +1103,8 @@ int bnx2fc_process_new_cqes(struct bnx2fc_rport *tgt) /* Unsolicited event notification */ bnx2fc_process_unsol_compl(tgt, wqe); } else { - bnx2fc_pending_work(tgt, wqe); - num_free_sqes++; + if (bnx2fc_pending_work(tgt, wqe)) + num_free_sqes++; } cqe++; tgt->cq_cons_idx++; diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 4c8122a82322..9ab915240a2a 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -24,7 +24,7 @@ static void bnx2fc_unmap_sg_list(struct bnx2fc_cmd *io_req); static void bnx2fc_free_mp_resc(struct bnx2fc_cmd *io_req); static void bnx2fc_parse_fcp_rsp(struct bnx2fc_cmd *io_req, struct fcoe_fcp_rsp_payload *fcp_rsp, - u8 num_rq); + u8 num_rq, unsigned char *rq_data); void bnx2fc_cmd_timer_set(struct bnx2fc_cmd *io_req, unsigned int timer_msec) @@ -1518,7 +1518,8 @@ static void bnx2fc_tgt_reset_cmpl(struct bnx2fc_cmd *io_req) } void bnx2fc_process_tm_compl(struct bnx2fc_cmd *io_req, - struct fcoe_task_ctx_entry *task, u8 num_rq) + struct fcoe_task_ctx_entry *task, u8 num_rq, + unsigned char *rq_data) { struct bnx2fc_mp_req *tm_req; struct fc_frame_header *fc_hdr; @@ -1557,7 +1558,7 @@ void bnx2fc_process_tm_compl(struct bnx2fc_cmd *io_req, if (fc_hdr->fh_r_ctl == FC_RCTL_DD_CMD_STATUS) { bnx2fc_parse_fcp_rsp(io_req, (struct fcoe_fcp_rsp_payload *) - rsp_buf, num_rq); + rsp_buf, num_rq, rq_data); if (io_req->fcp_rsp_code == 0) { /* TM successful */ if (tm_req->tm_flags & FCP_TMF_LUN_RESET) @@ -1755,15 +1756,11 @@ void bnx2fc_build_fcp_cmnd(struct bnx2fc_cmd *io_req, static void bnx2fc_parse_fcp_rsp(struct bnx2fc_cmd *io_req, struct fcoe_fcp_rsp_payload *fcp_rsp, - u8 num_rq) + u8 num_rq, unsigned char *rq_data) { struct scsi_cmnd *sc_cmd = io_req->sc_cmd; - struct bnx2fc_rport *tgt = io_req->tgt; u8 rsp_flags = fcp_rsp->fcp_flags.flags; u32 rq_buff_len = 0; - int i; - unsigned char *rq_data; - unsigned char *dummy; int fcp_sns_len = 0; int fcp_rsp_len = 0; @@ -1809,14 +1806,6 @@ static void bnx2fc_parse_fcp_rsp(struct bnx2fc_cmd *io_req, rq_buff_len = num_rq * BNX2FC_RQ_BUF_SZ; } - rq_data = bnx2fc_get_next_rqe(tgt, 1); - - if (num_rq > 1) { - /* We do not need extra sense data */ - for (i = 1; i < num_rq; i++) - dummy = bnx2fc_get_next_rqe(tgt, 1); - } - /* fetch fcp_rsp_code */ if ((fcp_rsp_len == 4) || (fcp_rsp_len == 8)) { /* Only for task management function */ @@ -1837,9 +1826,6 @@ static void bnx2fc_parse_fcp_rsp(struct bnx2fc_cmd *io_req, if (fcp_sns_len) memcpy(sc_cmd->sense_buffer, rq_data, fcp_sns_len); - /* return RQ entries */ - for (i = 0; i < num_rq; i++) - bnx2fc_return_rqe(tgt, 1); } } @@ -1918,7 +1904,7 @@ int bnx2fc_queuecommand(struct Scsi_Host *host, void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req, struct fcoe_task_ctx_entry *task, - u8 num_rq) + u8 num_rq, unsigned char *rq_data) { struct fcoe_fcp_rsp_payload *fcp_rsp; struct bnx2fc_rport *tgt = io_req->tgt; @@ -1950,7 +1936,7 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req, &(task->rxwr_only.union_ctx.comp_info.fcp_rsp.payload); /* parse fcp_rsp and obtain sense data from RQ if available */ - bnx2fc_parse_fcp_rsp(io_req, fcp_rsp, num_rq); + bnx2fc_parse_fcp_rsp(io_req, fcp_rsp, num_rq, rq_data); if (!sc_cmd->SCp.ptr) { printk(KERN_ERR PFX "SCp.ptr is NULL\n"); From 30e73671a41658193a23cbba1d3038af4fbf5bc6 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Thu, 26 Mar 2020 22:48:48 -0700 Subject: [PATCH 045/280] scsi: bnx2fc: Fix SCSI command completion after cleanup is posted Driver received a SCSI completion after it posted the cleanup request. This leads to a problem that one ref count wasn't released leading to flush_active_ios to get struck. The callback from libfc never returned and other ports were not processed leading to APD. Decrease the refcnt as well as try to complete if something is waiting for completion. Link: https://lore.kernel.org/r/20200327054849.15947-3-skashyap@marvell.com Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_io.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 9ab915240a2a..2b070f0835df 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -1917,6 +1917,12 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req, /* we will not receive ABTS response for this IO */ BNX2FC_IO_DBG(io_req, "Timer context finished processing " "this scsi cmd\n"); + if (test_and_clear_bit(BNX2FC_FLAG_IO_CLEANUP, + &io_req->req_flags)) { + BNX2FC_IO_DBG(io_req, + "Actual completion after cleanup request cleaning up\n"); + bnx2fc_process_cleanup_compl(io_req, task, num_rq); + } return; } From 955225db1bd2872d2421e30705244c756a3b7469 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Thu, 26 Mar 2020 22:48:49 -0700 Subject: [PATCH 046/280] scsi: bnx2fc: Update the driver version to 2.12.13 Update version to 2.12.13. Link: https://lore.kernel.org/r/20200327054849.15947-4-skashyap@marvell.com Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h index 15fa8e2a9ad8..b6e8ed757252 100644 --- a/drivers/scsi/bnx2fc/bnx2fc.h +++ b/drivers/scsi/bnx2fc/bnx2fc.h @@ -66,7 +66,7 @@ #include "bnx2fc_constants.h" #define BNX2FC_NAME "bnx2fc" -#define BNX2FC_VERSION "2.12.10" +#define BNX2FC_VERSION "2.12.13" #define PFX "bnx2fc: " From 45e544bfdab2014d11c7595b8ccc3c4715a09015 Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Thu, 26 Mar 2020 23:02:07 -0700 Subject: [PATCH 047/280] scsi: libfc: If PRLI rejected, move rport to PLOGI state If PRLI reject code indicates "rejected status", move rport state machine back to PLOGI state. Link: https://lore.kernel.org/r/20200327060208.17104-2-skashyap@marvell.com Signed-off-by: Javed Hasan Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_rport.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index da6e97d8dc3b..6bb8917b99a1 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -1208,9 +1208,15 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp, rjt = fc_frame_payload_get(fp, sizeof(*rjt)); if (!rjt) FC_RPORT_DBG(rdata, "PRLI bad response\n"); - else + else { FC_RPORT_DBG(rdata, "PRLI ELS rejected, reason %x expl %x\n", rjt->er_reason, rjt->er_explan); + if (rjt->er_reason == ELS_RJT_UNAB && + rjt->er_explan == ELS_EXPL_PLOGI_REQD) { + fc_rport_enter_plogi(rdata); + goto out; + } + } fc_rport_error_retry(rdata, FC_EX_ELS_RJT); } From c6e085c8813300cae73553bc276dbff015c1b0de Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Thu, 26 Mar 2020 23:02:08 -0700 Subject: [PATCH 048/280] scsi: libfc: rport state move to PLOGI if all PRLI retry exhausted After all PRLI retries are exhausted, move rport state machine back to PLOGI state. Link: https://lore.kernel.org/r/20200327060208.17104-3-skashyap@marvell.com Signed-off-by: Javed Hasan Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_rport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 6bb8917b99a1..773c45af9387 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -632,6 +632,8 @@ static void fc_rport_error(struct fc_rport_priv *rdata, int err) fc_rport_enter_ready(rdata); break; case RPORT_ST_PRLI: + fc_rport_enter_plogi(rdata); + break; case RPORT_ST_ADISC: fc_rport_enter_logo(rdata); break; From 763a18cbf79b0395ae09e63dc4d77335e4f88368 Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Mon, 23 Mar 2020 09:19:35 -0700 Subject: [PATCH 049/280] scsi: lpfc: Change default SCSI LUN QD to 64 The default lun queue depth by the driver has been 30 for many years. However, this value, when used with more recent hardware, has actually throttled some tests that concentrate io on a lun. Increase the default lun queue depth to 64. Queue full handling, reported by the target, remains in effect and unchanged. Link: https://lore.kernel.org/r/20200323161935.40341-1-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 3260e76e55b9..fae1c93353ac 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -3805,9 +3805,9 @@ LPFC_VPORT_ATTR_R(enable_da_id, 1, 0, 1, /* # lun_queue_depth: This parameter is used to limit the number of outstanding -# commands per FCP LUN. Value range is [1,512]. Default value is 30. +# commands per FCP LUN. */ -LPFC_VPORT_ATTR_R(lun_queue_depth, 30, 1, 512, +LPFC_VPORT_ATTR_R(lun_queue_depth, 64, 1, 512, "Max number of FCP commands we can queue to a specific LUN"); /* From f3ca3f5b0966cfc2737178cca8a7a65b09991898 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 27 Mar 2020 12:57:21 -0300 Subject: [PATCH 050/280] dt-bindings: sound: cs42l51: Remove unneeded I2C unit name The following warning is seen with 'make dt_binding_check': Documentation/devicetree/bindings/sound/cirrus,cs42l51.example.dts:18.15-34.11: Warning (unit_address_vs_reg): /example-0/i2c@0: node has a unit name, but no reg or ranges property Fix it by removing the unneeded i2c unit name. Signed-off-by: Fabio Estevam Acked-by: Rob Herring Link: https://lore.kernel.org/r/20200327155721.7596-1-festevam@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml b/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml index efce847a3408..83f44f07ac3f 100644 --- a/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml +++ b/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml @@ -49,7 +49,7 @@ required: examples: - | #include - i2c@0 { + i2c { #address-cells = <1>; #size-cells = <0>; From 76385a665ff5cfdf0a6dda43ec9bce7e7d5387d3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 30 Mar 2020 15:56:45 +0200 Subject: [PATCH 051/280] ASoC: bcm: Fix pointer cast warning The NULL check can be done gracefully without cast. It fixes a compile warning like: sound/soc/bcm/bcm63xx-pcm-whistler.c:184:6: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] Fixes: 88eb404ccc3e ("ASoC: brcm: Add DSL/PON SoC audio driver") Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200330135645.9707-1-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/bcm/bcm63xx-pcm-whistler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/bcm/bcm63xx-pcm-whistler.c b/sound/soc/bcm/bcm63xx-pcm-whistler.c index e46c390683e7..b7a1efc7406e 100644 --- a/sound/soc/bcm/bcm63xx-pcm-whistler.c +++ b/sound/soc/bcm/bcm63xx-pcm-whistler.c @@ -181,7 +181,7 @@ bcm63xx_pcm_pointer(struct snd_soc_component *component, snd_pcm_uframes_t x; struct bcm63xx_runtime_data *prtd = substream->runtime->private_data; - if ((void *)prtd->dma_addr_next == NULL) + if (!prtd->dma_addr_next) prtd->dma_addr_next = substream->runtime->dma_addr; x = bytes_to_frames(substream->runtime, From a91ab6509cd382dae4b7953155f47f276ff0d22f Mon Sep 17 00:00:00 2001 From: Akshu Agrawal Date: Sat, 28 Mar 2020 03:39:16 -0600 Subject: [PATCH 052/280] ASoC: AMD: Clear format bits before setting them This avoids residual bit form previous format when the format is changed. Hence, the resultant format is not an invalid one. Signed-off-by: Akshu Agrawal Signed-off-by: Ravulapati Vishnu vardhan rao Link: https://lore.kernel.org/r/20200328093921.32211-1-akshu.agrawal@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/raven/acp3x-i2s.c | 1 + sound/soc/amd/raven/acp3x.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/sound/soc/amd/raven/acp3x-i2s.c b/sound/soc/amd/raven/acp3x-i2s.c index 3a3c47e820ab..f160d35a6832 100644 --- a/sound/soc/amd/raven/acp3x-i2s.c +++ b/sound/soc/amd/raven/acp3x-i2s.c @@ -139,6 +139,7 @@ static int acp3x_i2s_hwparams(struct snd_pcm_substream *substream, rv_writel(adata->tdm_fmt, rtd->acp3x_base + frmt_reg); } val = rv_readl(rtd->acp3x_base + reg_val); + val &= ~ACP3x_ITER_IRER_SAMP_LEN_MASK; val = val | (rtd->xfer_resolution << 3); rv_writel(val, rtd->acp3x_base + reg_val); return 0; diff --git a/sound/soc/amd/raven/acp3x.h b/sound/soc/amd/raven/acp3x.h index 21e7ac017f2b..03fe93913e12 100644 --- a/sound/soc/amd/raven/acp3x.h +++ b/sound/soc/amd/raven/acp3x.h @@ -76,6 +76,8 @@ #define ACP_POWERED_OFF 0x02 #define ACP_POWER_OFF_IN_PROGRESS 0x03 +#define ACP3x_ITER_IRER_SAMP_LEN_MASK 0x38 + struct acp3x_platform_info { u16 play_i2s_instance; u16 cap_i2s_instance; From 5df5a577a6b4e92c74931e19c2b1f5820c834171 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Mon, 27 Jan 2020 11:34:41 +0100 Subject: [PATCH 053/280] dt-bindings: pwm: Convert google,cros-ec-pwm.txt to YAML format Convert the binding file google,cros-ec-pwm.txt to YAML format. This was tested and verified on ARM64 with: make dt_binding_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml make dtbs_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml Signed-off-by: Dafna Hirschfeld Reviewed-by: Rob Herring Signed-off-by: Thierry Reding --- .../bindings/pwm/google,cros-ec-pwm.txt | 23 ----------- .../bindings/pwm/google,cros-ec-pwm.yaml | 40 +++++++++++++++++++ 2 files changed, 40 insertions(+), 23 deletions(-) delete mode 100644 Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.txt create mode 100644 Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml diff --git a/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.txt b/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.txt deleted file mode 100644 index 472bd46ab5a4..000000000000 --- a/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.txt +++ /dev/null @@ -1,23 +0,0 @@ -* PWM controlled by ChromeOS EC - -Google's ChromeOS EC PWM is a simple PWM attached to the Embedded Controller -(EC) and controlled via a host-command interface. - -An EC PWM node should be only found as a sub-node of the EC node (see -Documentation/devicetree/bindings/mfd/cros-ec.txt). - -Required properties: -- compatible: Must contain "google,cros-ec-pwm" -- #pwm-cells: Should be 1. The cell specifies the PWM index. - -Example: - cros-ec@0 { - compatible = "google,cros-ec-spi"; - - ... - - cros_ec_pwm: ec-pwm { - compatible = "google,cros-ec-pwm"; - #pwm-cells = <1>; - }; - }; diff --git a/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml b/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml new file mode 100644 index 000000000000..24c217b76580 --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pwm/google,cros-ec-pwm.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: PWM controlled by ChromeOS EC + +maintainers: + - Thierry Reding + - '"Uwe Kleine-König" ' + +description: | + Google's ChromeOS EC PWM is a simple PWM attached to the Embedded Controller + (EC) and controlled via a host-command interface. + An EC PWM node should be only found as a sub-node of the EC node (see + Documentation/devicetree/bindings/mfd/cros-ec.txt). + +properties: + compatible: + const: google,cros-ec-pwm + "#pwm-cells": + description: The cell specifies the PWM index. + const: 1 + +required: + - compatible + - '#pwm-cells' + +additionalProperties: false + +examples: + - | + cros-ec@0 { + compatible = "google,cros-ec-spi"; + cros_ec_pwm: ec-pwm { + compatible = "google,cros-ec-pwm"; + #pwm-cells = <1>; + }; + }; From 2c25b07e5ec119cab609e41407a1fb3fa61442f5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 3 Feb 2020 13:35:35 -0800 Subject: [PATCH 054/280] pwm: bcm2835: Dynamically allocate base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The newer 2711 and 7211 chips have two PWM controllers and failure to dynamically allocate the PWM base would prevent the second PWM controller instance being probed for succeeding with an -EEXIST error from alloc_pwms(). Fixes: e5a06dc5ac1f ("pwm: Add BCM2835 PWM driver") Signed-off-by: Florian Fainelli Acked-by: Uwe Kleine-König Reviewed-by: Nicolas Saenz Julienne Signed-off-by: Thierry Reding --- drivers/pwm/pwm-bcm2835.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c index 91e24f01b54e..d78f86f8e462 100644 --- a/drivers/pwm/pwm-bcm2835.c +++ b/drivers/pwm/pwm-bcm2835.c @@ -166,6 +166,7 @@ static int bcm2835_pwm_probe(struct platform_device *pdev) pc->chip.dev = &pdev->dev; pc->chip.ops = &bcm2835_pwm_ops; + pc->chip.base = -1; pc->chip.npwm = 2; pc->chip.of_xlate = of_pwm_xlate_with_flags; pc->chip.of_pwm_n_cells = 3; From 15d4dbd601591858611184f9ddeb5bf21569159c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 9 Feb 2020 22:31:06 +0100 Subject: [PATCH 055/280] pwm: imx27: Fix clock handling in pwm_imx27_apply() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pwm_imx27_apply() enables the clocks if the previous PWM state was disabled. Given that the clocks are supposed to be left on iff the PWM is running, the decision to disable the clocks at the end of the function must not depend on the previous state. Without this fix the enable count of the two affected clocks increases by one whenever ->apply() changes from one disabled state to another. Fixes: bd88d319abe9 ("pwm: imx27: Unconditionally write state to hardware") Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-imx27.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index 35a7ac42269c..7e5ed0152977 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -289,7 +289,7 @@ static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm, writel(cr, imx->mmio_base + MX3_PWMCR); - if (!state->enabled && cstate.enabled) + if (!state->enabled) pwm_imx27_clk_disable_unprepare(chip); return 0; From aad4e530c241cbe862aff445f13db9099c9980eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 10 Feb 2020 22:22:38 +0100 Subject: [PATCH 056/280] pwm: imx27: Simplify helper function to enable and disable clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pwm_imx27_clk_prepare_enable() took a pointer to a struct pwm_chip just to convert it to a struct pwm_imx27_chip pointer while all callers already have the latter. Ditto for pwm_imx27_clk_disable_unprepare(). Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-imx27.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index 7e5ed0152977..e04ae566bbf9 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -96,9 +96,8 @@ struct pwm_imx27_chip { #define to_pwm_imx27_chip(chip) container_of(chip, struct pwm_imx27_chip, chip) -static int pwm_imx27_clk_prepare_enable(struct pwm_chip *chip) +static int pwm_imx27_clk_prepare_enable(struct pwm_imx27_chip *imx) { - struct pwm_imx27_chip *imx = to_pwm_imx27_chip(chip); int ret; ret = clk_prepare_enable(imx->clk_ipg); @@ -114,10 +113,8 @@ static int pwm_imx27_clk_prepare_enable(struct pwm_chip *chip) return 0; } -static void pwm_imx27_clk_disable_unprepare(struct pwm_chip *chip) +static void pwm_imx27_clk_disable_unprepare(struct pwm_imx27_chip *imx) { - struct pwm_imx27_chip *imx = to_pwm_imx27_chip(chip); - clk_disable_unprepare(imx->clk_per); clk_disable_unprepare(imx->clk_ipg); } @@ -130,7 +127,7 @@ static void pwm_imx27_get_state(struct pwm_chip *chip, u64 tmp; int ret; - ret = pwm_imx27_clk_prepare_enable(chip); + ret = pwm_imx27_clk_prepare_enable(imx); if (ret < 0) return; @@ -175,7 +172,7 @@ static void pwm_imx27_get_state(struct pwm_chip *chip, state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, pwm_clk); if (!state->enabled) - pwm_imx27_clk_disable_unprepare(chip); + pwm_imx27_clk_disable_unprepare(imx); } static void pwm_imx27_sw_reset(struct pwm_chip *chip) @@ -259,7 +256,7 @@ static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm, if (cstate.enabled) { pwm_imx27_wait_fifo_slot(chip, pwm); } else { - ret = pwm_imx27_clk_prepare_enable(chip); + ret = pwm_imx27_clk_prepare_enable(imx); if (ret) return ret; @@ -290,7 +287,7 @@ static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm, writel(cr, imx->mmio_base + MX3_PWMCR); if (!state->enabled) - pwm_imx27_clk_disable_unprepare(chip); + pwm_imx27_clk_disable_unprepare(imx); return 0; } @@ -361,7 +358,7 @@ static int pwm_imx27_remove(struct platform_device *pdev) imx = platform_get_drvdata(pdev); - pwm_imx27_clk_disable_unprepare(&imx->chip); + pwm_imx27_clk_disable_unprepare(imx); return pwmchip_remove(&imx->chip); } From 4563654fddc05a572f889373ba22abc616b3aa1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 10 Feb 2020 22:22:39 +0100 Subject: [PATCH 057/280] pwm: imx27: Don't disable clocks at device remove time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback is not supposed to modify hardware state. This is in the responsibility of the PWM consumer. After the PWM was disabled the clocks are off (apart from a bug that is fixed in the next patch), so unbinding the driver either stops the PWM (which it should not) or disables already disabled clocks yielding warnings from the clk core. So just drop the call to disable the clocks. (Which BTW was also in the wrong order because the call makes the PWM unfunctional and so should have come only after pwmchip_remove()). Fixes: 9f4c8f9607c3 ("pwm: imx: Add ipg clock operation") Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-imx27.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index e04ae566bbf9..fb142813d455 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -358,8 +358,6 @@ static int pwm_imx27_remove(struct platform_device *pdev) imx = platform_get_drvdata(pdev); - pwm_imx27_clk_disable_unprepare(imx); - return pwmchip_remove(&imx->chip); } From 2cb5cd90f4cd3f819178bb47f3d1ef86ce612db7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 10 Feb 2020 22:22:40 +0100 Subject: [PATCH 058/280] pwm: imx27: Ensure clocks being on iff the PWM is on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up to now the .probe() function didn't enable clocks and relied on the core to call the .get_state() callback to have the clock running. The latter enabled the needed clocks and kept them running if the PWM wass enabled. This only works correctly if the .get_state() callback is called exactly once and this single call happens before unused clocks are disabled by the clk core. The former wasn't true for a short period while commit 01ccf903edd6 ("pwm: Let pwm_get_state() return the last implemented state") applied and not reverted yet and might become wrong in the future. The latter isn't true any more since commit cfc4c189bc70 ("pwm: Read initial hardware state at request time") which results in a running PWM being stopped at boot time if for example the consumer lives in a kernel module that is only loaded after the clk core disabled unused clocks. So ensure .probe() is left with the clocks on if the PWM is running and .get_state() disables everything it enabled. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-imx27.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index fb142813d455..e83c077bb7cc 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -171,8 +171,7 @@ static void pwm_imx27_get_state(struct pwm_chip *chip, tmp = NSEC_PER_SEC * (u64)(val); state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, pwm_clk); - if (!state->enabled) - pwm_imx27_clk_disable_unprepare(imx); + pwm_imx27_clk_disable_unprepare(imx); } static void pwm_imx27_sw_reset(struct pwm_chip *chip) @@ -307,6 +306,8 @@ MODULE_DEVICE_TABLE(of, pwm_imx27_dt_ids); static int pwm_imx27_probe(struct platform_device *pdev) { struct pwm_imx27_chip *imx; + int ret; + u32 pwmcr; imx = devm_kzalloc(&pdev->dev, sizeof(*imx), GFP_KERNEL); if (imx == NULL) @@ -349,6 +350,15 @@ static int pwm_imx27_probe(struct platform_device *pdev) if (IS_ERR(imx->mmio_base)) return PTR_ERR(imx->mmio_base); + ret = pwm_imx27_clk_prepare_enable(imx); + if (ret) + return ret; + + /* keep clks on if pwm is running */ + pwmcr = readl(imx->mmio_base + MX3_PWMCR); + if (!(pwmcr & MX3_PWMCR_EN)) + pwm_imx27_clk_disable_unprepare(imx); + return pwmchip_add(&imx->chip); } From 3ad1f3a33286dc67d595f6fab3a3a9e583bc738a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 10 Feb 2020 22:35:18 +0100 Subject: [PATCH 059/280] pwm: Implement some checks for lowlevel drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some expectations which the callbacks provided by lowlevel drivers should fulfill. Implement checks that help driver authors to get these semantics right. As these have some overhead the checks can be disabled using a Kconfig setting. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/Kconfig | 9 +++ drivers/pwm/core.c | 135 +++++++++++++++++++++++++++++++++++++++++--- include/linux/pwm.h | 4 +- 3 files changed, 140 insertions(+), 8 deletions(-) diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index 30190beeb6e9..e21834f44a29 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -33,6 +33,15 @@ config PWM_SYSFS bool default y if SYSFS +config PWM_DEBUG + bool "PWM lowlevel drivers additional checks and debug messages" + depends on DEBUG_KERNEL + help + This option enables some additional checks to help lowlevel driver + authors to get their callbacks implemented correctly. + It is expected to introduce some runtime overhead and diagnostic + output to the kernel log, so only enable while working on a driver. + config PWM_AB8500 tristate "AB8500 PWM support" depends on AB8500_CORE && ARCH_U8500 diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 5a7f6598c05f..e9b9283cff28 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -120,6 +120,9 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label) if (pwm->chip->ops->get_state) { pwm->chip->ops->get_state(pwm->chip, pwm, &pwm->state); trace_pwm_get(pwm, &pwm->state); + + if (IS_ENABLED(PWM_DEBUG)) + pwm->last = pwm->state; } set_bit(PWMF_REQUESTED, &pwm->flags); @@ -232,17 +235,28 @@ void *pwm_get_chip_data(struct pwm_device *pwm) } EXPORT_SYMBOL_GPL(pwm_get_chip_data); -static bool pwm_ops_check(const struct pwm_ops *ops) +static bool pwm_ops_check(const struct pwm_chip *chip) { + + const struct pwm_ops *ops = chip->ops; + /* driver supports legacy, non-atomic operation */ - if (ops->config && ops->enable && ops->disable) - return true; + if (ops->config && ops->enable && ops->disable) { + if (IS_ENABLED(CONFIG_PWM_DEBUG)) + dev_warn(chip->dev, + "Driver needs updating to atomic API\n"); - /* driver supports atomic operation */ - if (ops->apply) return true; + } - return false; + if (!ops->apply) + return false; + + if (IS_ENABLED(CONFIG_PWM_DEBUG) && !ops->get_state) + dev_warn(chip->dev, + "Please implement the .get_state() callback\n"); + + return true; } /** @@ -266,7 +280,7 @@ int pwmchip_add_with_polarity(struct pwm_chip *chip, if (!chip || !chip->dev || !chip->ops || !chip->npwm) return -EINVAL; - if (!pwm_ops_check(chip->ops)) + if (!pwm_ops_check(chip)) return -EINVAL; mutex_lock(&pwm_lock); @@ -450,6 +464,107 @@ void pwm_free(struct pwm_device *pwm) } EXPORT_SYMBOL_GPL(pwm_free); +void pwm_apply_state_debug(struct pwm_device *pwm, + const struct pwm_state *state) +{ + struct pwm_state *last = &pwm->last; + struct pwm_chip *chip = pwm->chip; + struct pwm_state s1, s2; + int err; + + if (!IS_ENABLED(CONFIG_PWM_DEBUG)) + return; + + /* No reasonable diagnosis possible without .get_state() */ + if (!chip->ops->get_state) + return; + + /* + * *state was just applied. Read out the hardware state and do some + * checks. + */ + + chip->ops->get_state(chip, pwm, &s1); + trace_pwm_get(pwm, &s1); + + /* + * The lowlevel driver either ignored .polarity (which is a bug) or as + * best effort inverted .polarity and fixed .duty_cycle respectively. + * Undo this inversion and fixup for further tests. + */ + if (s1.enabled && s1.polarity != state->polarity) { + s2.polarity = state->polarity; + s2.duty_cycle = s1.period - s1.duty_cycle; + s2.period = s1.period; + s2.enabled = s1.enabled; + } else { + s2 = s1; + } + + if (s2.polarity != state->polarity && + state->duty_cycle < state->period) + dev_warn(chip->dev, ".apply ignored .polarity\n"); + + if (state->enabled && + last->polarity == state->polarity && + last->period > s2.period && + last->period <= state->period) + dev_warn(chip->dev, + ".apply didn't pick the best available period (requested: %u, applied: %u, possible: %u)\n", + state->period, s2.period, last->period); + + if (state->enabled && state->period < s2.period) + dev_warn(chip->dev, + ".apply is supposed to round down period (requested: %u, applied: %u)\n", + state->period, s2.period); + + if (state->enabled && + last->polarity == state->polarity && + last->period == s2.period && + last->duty_cycle > s2.duty_cycle && + last->duty_cycle <= state->duty_cycle) + dev_warn(chip->dev, + ".apply didn't pick the best available duty cycle (requested: %u/%u, applied: %u/%u, possible: %u/%u)\n", + state->duty_cycle, state->period, + s2.duty_cycle, s2.period, + last->duty_cycle, last->period); + + if (state->enabled && state->duty_cycle < s2.duty_cycle) + dev_warn(chip->dev, + ".apply is supposed to round down duty_cycle (requested: %u/%u, applied: %u/%u)\n", + state->duty_cycle, state->period, + s2.duty_cycle, s2.period); + + if (!state->enabled && s2.enabled && s2.duty_cycle > 0) + dev_warn(chip->dev, + "requested disabled, but yielded enabled with duty > 0"); + + /* reapply the state that the driver reported being configured. */ + err = chip->ops->apply(chip, pwm, &s1); + if (err) { + *last = s1; + dev_err(chip->dev, "failed to reapply current setting\n"); + return; + } + + trace_pwm_apply(pwm, &s1); + + chip->ops->get_state(chip, pwm, last); + trace_pwm_get(pwm, last); + + /* reapplication of the current state should give an exact match */ + if (s1.enabled != last->enabled || + s1.polarity != last->polarity || + (s1.enabled && s1.period != last->period) || + (s1.enabled && s1.duty_cycle != last->duty_cycle)) { + dev_err(chip->dev, + ".apply is not idempotent (ena=%d pol=%d %u/%u) -> (ena=%d pol=%d %u/%u)\n", + s1.enabled, s1.polarity, s1.duty_cycle, s1.period, + last->enabled, last->polarity, last->duty_cycle, + last->period); + } +} + /** * pwm_apply_state() - atomically apply a new state to a PWM device * @pwm: PWM device @@ -480,6 +595,12 @@ int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state) trace_pwm_apply(pwm, state); pwm->state = *state; + + /* + * only do this after pwm->state was applied as some + * implementations of .get_state depend on this + */ + pwm_apply_state_debug(pwm, state); } else { /* * FIXME: restore the initial state in case of error. diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 0ef808d925bb..2635b2a55090 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -71,7 +71,8 @@ struct pwm_state { * @chip: PWM chip providing this PWM device * @chip_data: chip-private data associated with the PWM device * @args: PWM arguments - * @state: curent PWM channel state + * @state: last applied state + * @last: last implemented state (for PWM_DEBUG) */ struct pwm_device { const char *label; @@ -83,6 +84,7 @@ struct pwm_device { struct pwm_args args; struct pwm_state state; + struct pwm_state last; }; /** From c0adbd1cdfcdf78f48b1d7797c775b9d36d665a9 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Fri, 21 Feb 2020 15:45:27 +0800 Subject: [PATCH 060/280] pwm: imx-tpm: Remove unused includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is nothing in use from log2.h/of_address.h, remove them. Signed-off-by: Anson Huang Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-imx-tpm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c index 9145f6160649..5f3d7f7e6aef 100644 --- a/drivers/pwm/pwm-imx-tpm.c +++ b/drivers/pwm/pwm-imx-tpm.c @@ -18,10 +18,8 @@ #include #include #include -#include #include #include -#include #include #include #include From cf7987320a1aba758f2f3e9f71e1bc23e9558e83 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Fri, 21 Feb 2020 15:45:28 +0800 Subject: [PATCH 061/280] pwm: imx27: Remove unused include of of_device.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is nothing in use from of_device.h, remove it. Signed-off-by: Anson Huang Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-imx27.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index e83c077bb7cc..a6e40d4c485f 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include From cef6df8bcdda10a0b2984030f161dcd01e2e934c Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Fri, 21 Feb 2020 15:45:29 +0800 Subject: [PATCH 062/280] pwm: mxs: Remove unused include of of_address.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is nothing in use from of_address.h, remove it. Signed-off-by: Anson Huang Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-mxs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c index f2e57fcf8f8b..7ce616923c52 100644 --- a/drivers/pwm/pwm-mxs.c +++ b/drivers/pwm/pwm-mxs.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include From 69ee15f1b7a506b0484d58f2ee6add598fbca29f Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 26 Feb 2020 14:52:26 +0100 Subject: [PATCH 063/280] pwm: pca9685: Remove unused duty_cycle struct element duty_cycle was only set, never read. Signed-off-by: Matthias Schiffer Signed-off-by: Thierry Reding --- drivers/pwm/pwm-pca9685.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index b07bdca3d510..19ac97108a64 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -69,7 +69,6 @@ struct pca9685 { struct pwm_chip chip; struct regmap *regmap; - int duty_ns; int period_ns; #if IS_ENABLED(CONFIG_GPIOLIB) struct mutex lock; @@ -272,8 +271,6 @@ static int pca9685_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, } } - pca->duty_ns = duty_ns; - if (duty_ns < 1) { if (pwm->hwpwm >= PCA9685_MAXCHAN) reg = PCA9685_ALL_LED_OFF_H; @@ -449,7 +446,6 @@ static int pca9685_pwm_probe(struct i2c_client *client, ret); return ret; } - pca->duty_ns = 0; pca->period_ns = PCA9685_DEFAULT_PERIOD; i2c_set_clientdata(client, pca); From e96c0ff4b1e013a4e9174344b0fcda0d566d3689 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Mar 2020 21:24:47 +0100 Subject: [PATCH 064/280] pwm: Enable compile testing for some of drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of the PWM drivers can be compile tested to increase build coverage. The Meson PWM driver requires COMMON_CLK dependency. Signed-off-by: Krzysztof Kozlowski Acked-by: Florian Fainelli # For Broadcoam Reviewed-by: Martin Blumenstingl # For Meson Acked-by: Claudiu Beznea # For Atmel Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/Kconfig | 47 ++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index e21834f44a29..0e64c62d4b6b 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -53,7 +53,8 @@ config PWM_AB8500 config PWM_ATMEL tristate "Atmel PWM support" - depends on ARCH_AT91 && OF + depends on OF + depends on ARCH_AT91 || COMPILE_TEST help Generic PWM framework driver for Atmel SoC. @@ -109,7 +110,7 @@ config PWM_BCM_KONA config PWM_BCM2835 tristate "BCM2835 PWM support" - depends on ARCH_BCM2835 || ARCH_BRCMSTB + depends on ARCH_BCM2835 || ARCH_BRCMSTB || COMPILE_TEST help PWM framework driver for BCM2835 controller (Raspberry Pi) @@ -118,7 +119,7 @@ config PWM_BCM2835 config PWM_BERLIN tristate "Marvell Berlin PWM support" - depends on ARCH_BERLIN + depends on ARCH_BERLIN || COMPILE_TEST help PWM framework driver for Marvell Berlin SoCs. @@ -127,7 +128,7 @@ config PWM_BERLIN config PWM_BRCMSTB tristate "Broadcom STB PWM support" - depends on ARCH_BRCMSTB || BMIPS_GENERIC + depends on ARCH_BRCMSTB || BMIPS_GENERIC || COMPILE_TEST help Generic PWM framework driver for the Broadcom Set-top-Box SoCs (BCM7xxx). @@ -161,7 +162,7 @@ config PWM_CROS_EC config PWM_EP93XX tristate "Cirrus Logic EP93xx PWM support" - depends on ARCH_EP93XX + depends on ARCH_EP93XX || COMPILE_TEST help Generic PWM framework driver for Cirrus Logic EP93xx. @@ -204,7 +205,7 @@ config PWM_IMG config PWM_IMX1 tristate "i.MX1 PWM support" - depends on ARCH_MXC + depends on ARCH_MXC || COMPILE_TEST help Generic PWM framework driver for i.MX1 and i.MX21 @@ -213,7 +214,7 @@ config PWM_IMX1 config PWM_IMX27 tristate "i.MX27 PWM support" - depends on ARCH_MXC + depends on ARCH_MXC || COMPILE_TEST help Generic PWM framework driver for i.MX27 and later i.MX SoCs. @@ -253,7 +254,7 @@ config PWM_LP3943 config PWM_LPC18XX_SCT tristate "LPC18xx/43xx PWM/SCT support" - depends on ARCH_LPC18XX + depends on ARCH_LPC18XX || COMPILE_TEST help Generic PWM framework driver for NXP LPC18xx PWM/SCT which supports 16 channels. @@ -265,7 +266,7 @@ config PWM_LPC18XX_SCT config PWM_LPC32XX tristate "LPC32XX PWM support" - depends on ARCH_LPC32XX + depends on ARCH_LPC32XX || COMPILE_TEST help Generic PWM framework driver for LPC32XX. The LPC32XX SOC has two PWM controllers. @@ -298,7 +299,8 @@ config PWM_LPSS_PLATFORM config PWM_MESON tristate "Amlogic Meson PWM driver" - depends on ARCH_MESON + depends on ARCH_MESON || COMPILE_TEST + depends on COMMON_CLK help The platform driver for Amlogic Meson PWM controller. @@ -327,7 +329,8 @@ config PWM_MEDIATEK config PWM_MXS tristate "Freescale MXS PWM support" - depends on ARCH_MXS && OF + depends on OF + depends on ARCH_MXS || COMPILE_TEST select STMP_DEVICE help Generic PWM framework driver for Freescale MXS. @@ -366,7 +369,7 @@ config PWM_PUV3 config PWM_PXA tristate "PXA PWM support" - depends on ARCH_PXA + depends on ARCH_PXA || COMPILE_TEST help Generic PWM framework driver for PXA. @@ -397,14 +400,14 @@ config PWM_RENESAS_TPU config PWM_ROCKCHIP tristate "Rockchip PWM support" - depends on ARCH_ROCKCHIP + depends on ARCH_ROCKCHIP || COMPILE_TEST help Generic PWM framework driver for the PWM controller found on Rockchip SoCs. config PWM_SAMSUNG tristate "Samsung PWM support" - depends on PLAT_SAMSUNG || ARCH_EXYNOS + depends on PLAT_SAMSUNG || ARCH_EXYNOS || COMPILE_TEST help Generic PWM framework driver for Samsung. @@ -424,7 +427,7 @@ config PWM_SIFIVE config PWM_SPEAR tristate "STMicroelectronics SPEAr PWM support" - depends on PLAT_SPEAR + depends on PLAT_SPEAR || COMPILE_TEST depends on OF help Generic PWM framework driver for the PWM controller on ST @@ -446,7 +449,7 @@ config PWM_SPRD config PWM_STI tristate "STiH4xx PWM support" - depends on ARCH_STI + depends on ARCH_STI || COMPILE_TEST depends on OF help Generic PWM framework driver for STiH4xx SoCs. @@ -456,7 +459,7 @@ config PWM_STI config PWM_STM32 tristate "STMicroelectronics STM32 PWM" - depends on MFD_STM32_TIMERS + depends on MFD_STM32_TIMERS || COMPILE_TEST help Generic PWM framework driver for STM32 SoCs. @@ -492,7 +495,7 @@ config PWM_SUN4I config PWM_TEGRA tristate "NVIDIA Tegra PWM support" - depends on ARCH_TEGRA + depends on ARCH_TEGRA || COMPILE_TEST help Generic PWM framework driver for the PWFM controller found on NVIDIA Tegra SoCs. @@ -502,7 +505,7 @@ config PWM_TEGRA config PWM_TIECAP tristate "ECAP PWM support" - depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_KEYSTONE || ARCH_K3 + depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST help PWM driver support for the ECAP APWM controller found on TI SOCs @@ -511,7 +514,7 @@ config PWM_TIECAP config PWM_TIEHRPWM tristate "EHRPWM PWM support" - depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_K3 + depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_K3 || COMPILE_TEST help PWM driver support for the EHRPWM controller found on TI SOCs @@ -538,7 +541,7 @@ config PWM_TWL_LED config PWM_VT8500 tristate "vt8500 PWM support" - depends on ARCH_VT8500 + depends on ARCH_VT8500 || COMPILE_TEST help Generic PWM framework driver for vt8500. @@ -547,7 +550,7 @@ config PWM_VT8500 config PWM_ZX tristate "ZTE ZX PWM support" - depends on ARCH_ZX + depends on ARCH_ZX || COMPILE_TEST help Generic PWM framework driver for ZTE ZX family SoCs. From 2d0c08fcd67c23cf8433344544fb5a6c059c2572 Mon Sep 17 00:00:00 2001 From: Sandipan Patra Date: Thu, 5 Mar 2020 16:57:33 +0530 Subject: [PATCH 065/280] pwm: tegra: Add support for Tegra194 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tegra194 has multiple PWM controllers with each having only one output. Also the maxmimum frequency is higher than earlier SoCs. Add support for Tegra194 and specify the number of PWM outputs and maximum supported frequency using device tree match data. Signed-off-by: Sandipan Patra Acked-by: Uwe Kleine-König Acked-by: Laxman Dewangan Acked-by: Rob Herring Acked-by: Jon Hunter Signed-off-by: Thierry Reding --- .../devicetree/bindings/pwm/nvidia,tegra20-pwm.txt | 1 + drivers/pwm/pwm-tegra.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt b/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt index 0a69eadf44ce..74c41e34c3b6 100644 --- a/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt +++ b/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt @@ -9,6 +9,7 @@ Required properties: - "nvidia,tegra132-pwm", "nvidia,tegra20-pwm": for Tegra132 - "nvidia,tegra210-pwm", "nvidia,tegra20-pwm": for Tegra210 - "nvidia,tegra186-pwm": for Tegra186 + - "nvidia,tegra194-pwm": for Tegra194 - reg: physical base address and length of the controller's registers - #pwm-cells: should be 2. See pwm.yaml in this directory for a description of the cells format. diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c index aa12fb3ed92e..d26ed8f579ff 100644 --- a/drivers/pwm/pwm-tegra.c +++ b/drivers/pwm/pwm-tegra.c @@ -282,9 +282,15 @@ static const struct tegra_pwm_soc tegra186_pwm_soc = { .max_frequency = 102000000UL, }; +static const struct tegra_pwm_soc tegra194_pwm_soc = { + .num_channels = 1, + .max_frequency = 408000000UL, +}; + static const struct of_device_id tegra_pwm_of_match[] = { { .compatible = "nvidia,tegra20-pwm", .data = &tegra20_pwm_soc }, { .compatible = "nvidia,tegra186-pwm", .data = &tegra186_pwm_soc }, + { .compatible = "nvidia,tegra194-pwm", .data = &tegra194_pwm_soc }, { } }; MODULE_DEVICE_TABLE(of, tegra_pwm_of_match); From 408a7591d91a8bb935109776e889bbaa42787c09 Mon Sep 17 00:00:00 2001 From: Rishi Gupta Date: Wed, 11 Mar 2020 21:13:49 +0530 Subject: [PATCH 066/280] pwm: pca9685: Replace CONFIG_PM with __maybe_unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The __maybe_unused attribute is preferred over CONFIG_PM to prevent potential build time issues. This commit replaces CONFIG_PM with this attribute. Signed-off-by: Rishi Gupta Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-pca9685.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index 19ac97108a64..9467fb68442d 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -508,8 +508,7 @@ static int pca9685_pwm_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int pca9685_pwm_runtime_suspend(struct device *dev) +static int __maybe_unused pca9685_pwm_runtime_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct pca9685 *pca = i2c_get_clientdata(client); @@ -518,7 +517,7 @@ static int pca9685_pwm_runtime_suspend(struct device *dev) return 0; } -static int pca9685_pwm_runtime_resume(struct device *dev) +static int __maybe_unused pca9685_pwm_runtime_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct pca9685 *pca = i2c_get_clientdata(client); @@ -526,7 +525,6 @@ static int pca9685_pwm_runtime_resume(struct device *dev) pca9685_set_sleep_mode(pca, false); return 0; } -#endif static const struct i2c_device_id pca9685_id[] = { { "pca9685", 0 }, From a37507d5bfec9224e425a055683101560514dff8 Mon Sep 17 00:00:00 2001 From: Rishi Gupta Date: Wed, 11 Mar 2020 21:22:20 +0530 Subject: [PATCH 067/280] pwm: pca9685: Use gpio core provided macro GPIO_LINE_DIRECTION_OUT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPIO core recently added macro to uniformly specify direction of a GPIO line, so use it. Signed-off-by: Rishi Gupta Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-pca9685.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index 9467fb68442d..20bdc59a0cbb 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -167,7 +167,7 @@ static int pca9685_pwm_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) { /* Always out */ - return 0; + return GPIO_LINE_DIRECTION_OUT; } static int pca9685_pwm_gpio_direction_input(struct gpio_chip *gpio, From b33d232e6112aa175f08e8105de0a4da14f5dcbb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 14 Mar 2020 12:35:24 +0100 Subject: [PATCH 068/280] pwm: meson: Fix confusing indentation Fix indentation of return block. Smatch warning: drivers/pwm/pwm-meson.c:139 meson_pwm_request() warn: inconsistent indenting Reported-by: kbuild test robot Signed-off-by: Krzysztof Kozlowski Acked-by: Neil Armstrong Fixes: 211ed630753d ("pwm: Add support for Meson PWM Controller") Signed-off-by: Thierry Reding --- drivers/pwm/pwm-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 6245bbdb6e6c..8cf9129caa39 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -136,7 +136,7 @@ static int meson_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) dev_err(dev, "failed to set parent %s for %s: %d\n", __clk_get_name(channel->clk_parent), __clk_get_name(channel->clk), err); - return err; + return err; } } From 5928ce02a7d95b400f9b9a455448e228d4635198 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 16 Mar 2020 11:14:53 +0100 Subject: [PATCH 069/280] dt-bindings: pwm: renesas-tpu: Document more R-Car Gen2 support All R-Car Gen2 SoCs have a Renesas Timer Pulse Unit. Document support for the missing variants. Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: Thierry Reding --- Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml index 4969a954993c..4bf62a3d5bba 100644 --- a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml +++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml @@ -19,6 +19,10 @@ properties: - renesas,tpu-r8a7744 # RZ/G1N - renesas,tpu-r8a7745 # RZ/G1E - renesas,tpu-r8a7790 # R-Car H2 + - renesas,tpu-r8a7791 # R-Car M2-W + - renesas,tpu-r8a7792 # R-Car V2H + - renesas,tpu-r8a7793 # R-Car M2-N + - renesas,tpu-r8a7794 # R-Car E2 - renesas,tpu-r8a7795 # R-Car H3 - renesas,tpu-r8a7796 # R-Car M3-W - renesas,tpu-r8a77965 # R-Car M3-N From 1451a3eed24b5fd6a604683f0b6995e0e7e16c79 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 16 Mar 2020 11:32:14 +0100 Subject: [PATCH 070/280] pwm: rcar: Fix late Runtime PM enablement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Runtime PM should be enabled before calling pwmchip_add(), as PWM users can appear immediately after the PWM chip has been added. Likewise, Runtime PM should be disabled after the removal of the PWM chip. Fixes: ed6c1476bf7f16d5 ("pwm: Add support for R-Car PWM Timer") Signed-off-by: Geert Uytterhoeven Reviewed-by: Uwe Kleine-König Reviewed-by: Laurent Pinchart Signed-off-by: Thierry Reding --- drivers/pwm/pwm-rcar.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c index 2685577b6dd4..7ab9eb6616d9 100644 --- a/drivers/pwm/pwm-rcar.c +++ b/drivers/pwm/pwm-rcar.c @@ -229,24 +229,28 @@ static int rcar_pwm_probe(struct platform_device *pdev) rcar_pwm->chip.base = -1; rcar_pwm->chip.npwm = 1; + pm_runtime_enable(&pdev->dev); + ret = pwmchip_add(&rcar_pwm->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to register PWM chip: %d\n", ret); + pm_runtime_disable(&pdev->dev); return ret; } - pm_runtime_enable(&pdev->dev); - return 0; } static int rcar_pwm_remove(struct platform_device *pdev) { struct rcar_pwm_chip *rcar_pwm = platform_get_drvdata(pdev); + int ret; + + ret = pwmchip_remove(&rcar_pwm->chip); pm_runtime_disable(&pdev->dev); - return pwmchip_remove(&rcar_pwm->chip); + return ret; } static const struct of_device_id rcar_pwm_of_table[] = { From d5a3c7a4536e1329a758e14340efd0e65252bd3d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 16 Mar 2020 11:32:15 +0100 Subject: [PATCH 071/280] pwm: renesas-tpu: Fix late Runtime PM enablement Runtime PM should be enabled before calling pwmchip_add(), as PWM users can appear immediately after the PWM chip has been added. Likewise, Runtime PM should always be disabled after the removal of the PWM chip, even if the latter failed. Fixes: 99b82abb0a35b073 ("pwm: Add Renesas TPU PWM driver") Signed-off-by: Geert Uytterhoeven Signed-off-by: Thierry Reding --- drivers/pwm/pwm-renesas-tpu.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c index 4a855a21b782..8032acc84161 100644 --- a/drivers/pwm/pwm-renesas-tpu.c +++ b/drivers/pwm/pwm-renesas-tpu.c @@ -415,16 +415,17 @@ static int tpu_probe(struct platform_device *pdev) tpu->chip.base = -1; tpu->chip.npwm = TPU_CHANNEL_MAX; + pm_runtime_enable(&pdev->dev); + ret = pwmchip_add(&tpu->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to register PWM chip\n"); + pm_runtime_disable(&pdev->dev); return ret; } dev_info(&pdev->dev, "TPU PWM %d registered\n", tpu->pdev->id); - pm_runtime_enable(&pdev->dev); - return 0; } @@ -434,12 +435,10 @@ static int tpu_remove(struct platform_device *pdev) int ret; ret = pwmchip_remove(&tpu->chip); - if (ret) - return ret; pm_runtime_disable(&pdev->dev); - return 0; + return ret; } #ifdef CONFIG_OF From a1098c13a3ecd61015f63753c80c212ceaea7fb4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 16 Mar 2020 11:32:16 +0100 Subject: [PATCH 072/280] pwm: renesas-tpu: Drop confusing registered message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During device probe, the message TPU PWM -1 registered is printed. While the "-1" looks suspicious, it is perfectly normal for a device instantiated from DT. Remove the message, as there are no non-DT users left, and other drivers don't print such messages either. Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-renesas-tpu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c index 8032acc84161..81ad5a551455 100644 --- a/drivers/pwm/pwm-renesas-tpu.c +++ b/drivers/pwm/pwm-renesas-tpu.c @@ -424,8 +424,6 @@ static int tpu_probe(struct platform_device *pdev) return ret; } - dev_info(&pdev->dev, "TPU PWM %d registered\n", tpu->pdev->id); - return 0; } From 54091b5f195b45a9a7d394008c06d2b9646ab126 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 12 Mar 2020 09:52:06 +0530 Subject: [PATCH 073/280] pwm: omap-dmtimer: Drop unused header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pwm_omap_dmtimer.h is used only: - to typedef struct omap_dm_timer to pwm_omap_dmtimer - for macro PWM_OMAP_DMTIMER_TRIGGER_OVERFLOW_AND_COMPARE Rest of the file is pretty mush unsed. So reuse omap_dm_timer and OMAP_TIMER_TRIGGER_OVERFLOW_AND_COMPARE in pwm-omap-dmtimer.c and delete the header file. Acked-by: Tony Lindgren Signed-off-by: Lokesh Vutla Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-omap-dmtimer.c | 20 ++--- include/clocksource/timer-ti-dm.h | 3 +- .../linux/platform_data/pwm_omap_dmtimer.h | 90 ------------------- 3 files changed, 10 insertions(+), 103 deletions(-) delete mode 100644 include/linux/platform_data/pwm_omap_dmtimer.h diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 9e4378dc6897..e4f5f710bfaa 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -20,8 +20,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -34,7 +34,7 @@ struct pwm_omap_dmtimer_chip { struct pwm_chip chip; struct mutex mutex; - pwm_omap_dmtimer *dm_timer; + struct omap_dm_timer *dm_timer; const struct omap_dm_timer_ops *pdata; struct platform_device *dm_timer_pdev; }; @@ -190,10 +190,9 @@ static int pwm_omap_dmtimer_config(struct pwm_chip *chip, load_value, load_value, match_value, match_value); omap->pdata->set_pwm(omap->dm_timer, - pwm_get_polarity(pwm) == PWM_POLARITY_INVERSED, - true, - PWM_OMAP_DMTIMER_TRIGGER_OVERFLOW_AND_COMPARE, - true); + pwm_get_polarity(pwm) == PWM_POLARITY_INVERSED, + true, OMAP_TIMER_TRIGGER_OVERFLOW_AND_COMPARE, + true); /* If config was called while timer was running it must be reenabled. */ if (timer_active) @@ -221,10 +220,9 @@ static int pwm_omap_dmtimer_set_polarity(struct pwm_chip *chip, */ mutex_lock(&omap->mutex); omap->pdata->set_pwm(omap->dm_timer, - polarity == PWM_POLARITY_INVERSED, - true, - PWM_OMAP_DMTIMER_TRIGGER_OVERFLOW_AND_COMPARE, - true); + polarity == PWM_POLARITY_INVERSED, + true, OMAP_TIMER_TRIGGER_OVERFLOW_AND_COMPARE, + true); mutex_unlock(&omap->mutex); return 0; @@ -246,7 +244,7 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev) struct pwm_omap_dmtimer_chip *omap; struct dmtimer_platform_data *timer_pdata; const struct omap_dm_timer_ops *pdata; - pwm_omap_dmtimer *dm_timer; + struct omap_dm_timer *dm_timer; u32 v; int ret = 0; diff --git a/include/clocksource/timer-ti-dm.h b/include/clocksource/timer-ti-dm.h index 25f05235866e..531ca87fcd08 100644 --- a/include/clocksource/timer-ti-dm.h +++ b/include/clocksource/timer-ti-dm.h @@ -248,8 +248,7 @@ int omap_dm_timers_active(void); /* * The below are inlined to optimize code size for system timers. Other code - * should not need these at all, see - * include/linux/platform_data/pwm_omap_dmtimer.h + * should not need these at all. */ #if defined(CONFIG_ARCH_OMAP1) || defined(CONFIG_ARCH_OMAP2PLUS) static inline u32 __omap_dm_timer_read(struct omap_dm_timer *timer, u32 reg, diff --git a/include/linux/platform_data/pwm_omap_dmtimer.h b/include/linux/platform_data/pwm_omap_dmtimer.h deleted file mode 100644 index e7d521e48855..000000000000 --- a/include/linux/platform_data/pwm_omap_dmtimer.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * include/linux/platform_data/pwm_omap_dmtimer.h - * - * OMAP Dual-Mode Timer PWM platform data - * - * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com/ - * Tarun Kanti DebBarma - * Thara Gopinath - * - * Platform device conversion and hwmod support. - * - * Copyright (C) 2005 Nokia Corporation - * Author: Lauri Leukkunen - * PWM and clock framework support by Timo Teras. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __PWM_OMAP_DMTIMER_PDATA_H -#define __PWM_OMAP_DMTIMER_PDATA_H - -/* clock sources */ -#define PWM_OMAP_DMTIMER_SRC_SYS_CLK 0x00 -#define PWM_OMAP_DMTIMER_SRC_32_KHZ 0x01 -#define PWM_OMAP_DMTIMER_SRC_EXT_CLK 0x02 - -/* timer interrupt enable bits */ -#define PWM_OMAP_DMTIMER_INT_CAPTURE (1 << 2) -#define PWM_OMAP_DMTIMER_INT_OVERFLOW (1 << 1) -#define PWM_OMAP_DMTIMER_INT_MATCH (1 << 0) - -/* trigger types */ -#define PWM_OMAP_DMTIMER_TRIGGER_NONE 0x00 -#define PWM_OMAP_DMTIMER_TRIGGER_OVERFLOW 0x01 -#define PWM_OMAP_DMTIMER_TRIGGER_OVERFLOW_AND_COMPARE 0x02 - -struct omap_dm_timer; -typedef struct omap_dm_timer pwm_omap_dmtimer; - -struct pwm_omap_dmtimer_pdata { - pwm_omap_dmtimer *(*request_by_node)(struct device_node *np); - pwm_omap_dmtimer *(*request_specific)(int timer_id); - pwm_omap_dmtimer *(*request)(void); - - int (*free)(pwm_omap_dmtimer *timer); - - void (*enable)(pwm_omap_dmtimer *timer); - void (*disable)(pwm_omap_dmtimer *timer); - - int (*get_irq)(pwm_omap_dmtimer *timer); - int (*set_int_enable)(pwm_omap_dmtimer *timer, unsigned int value); - int (*set_int_disable)(pwm_omap_dmtimer *timer, u32 mask); - - struct clk *(*get_fclk)(pwm_omap_dmtimer *timer); - - int (*start)(pwm_omap_dmtimer *timer); - int (*stop)(pwm_omap_dmtimer *timer); - int (*set_source)(pwm_omap_dmtimer *timer, int source); - - int (*set_load)(pwm_omap_dmtimer *timer, int autoreload, - unsigned int value); - int (*set_match)(pwm_omap_dmtimer *timer, int enable, - unsigned int match); - int (*set_pwm)(pwm_omap_dmtimer *timer, int def_on, - int toggle, int trigger); - int (*set_prescaler)(pwm_omap_dmtimer *timer, int prescaler); - - unsigned int (*read_counter)(pwm_omap_dmtimer *timer); - int (*write_counter)(pwm_omap_dmtimer *timer, unsigned int value); - unsigned int (*read_status)(pwm_omap_dmtimer *timer); - int (*write_status)(pwm_omap_dmtimer *timer, unsigned int value); -}; - -#endif /* __PWM_OMAP_DMTIMER_PDATA_H */ From 348fb6f7fb4cc7ebc35d1cde6e5c2ada64b683c6 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 12 Mar 2020 09:52:07 +0530 Subject: [PATCH 074/280] pwm: omap-dmtimer: Update description for PWM OMAP DM timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the description with a brief about how PWM is generated using OMAP DM timer and add limitations for the PWM generations. Also add a link to the reference manual. Suggested-by: Uwe Kleine-König Acked-by: Tony Lindgren Signed-off-by: Lokesh Vutla Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-omap-dmtimer.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index e4f5f710bfaa..92aac6c86b95 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -10,7 +10,15 @@ * * Description: * This file is the core OMAP support for the generic, Linux - * PWM driver / controller, using the OMAP's dual-mode timers. + * PWM driver / controller, using the OMAP's dual-mode timers + * with a timer counter that goes up. When it overflows it gets + * reloaded with the load value and the pwm output goes up. + * When counter matches with match register, the output goes down. + * Reference Manual: http://www.ti.com/lit/ug/spruh73q/spruh73q.pdf + * + * Limitations: + * - When PWM is stopped, timer counter gets stopped immediately. This + * doesn't allow the current PWM period to complete and stops abruptly. */ #include From 867beb60d131f7a5cde88ba375338285fdc6ddb8 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 12 Mar 2020 09:52:08 +0530 Subject: [PATCH 075/280] pwm: omap-dmtimer: Fix PWM enabling sequence To configure DM timer in PWM mode the following needs to be set in OMAP_TIMER_CTRL_REG using set_pwm callback: - Set toggle mode on PORTIMERPWM output pin - Set trigger on overflow and match on PORTIMERPWM output pin. - Set auto reload This is a one time configuration and needs to be set before the start of the DM timer. But the current driver tries to set the same configuration for every period/duty cycle update, which is not needed. So move the PWM setup before enabling timer and do not update it in pwm_omap_dmtimer_config(). Tested-by: Tony Lindgren Signed-off-by: Lokesh Vutla Signed-off-by: Thierry Reding --- drivers/pwm/pwm-omap-dmtimer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 92aac6c86b95..85b17b49980b 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -81,6 +81,11 @@ static int pwm_omap_dmtimer_enable(struct pwm_chip *chip, struct pwm_omap_dmtimer_chip *omap = to_pwm_omap_dmtimer_chip(chip); mutex_lock(&omap->mutex); + omap->pdata->set_pwm(omap->dm_timer, + pwm_get_polarity(pwm) == PWM_POLARITY_INVERSED, + true, OMAP_TIMER_TRIGGER_OVERFLOW_AND_COMPARE, + true); + pwm_omap_dmtimer_start(omap); mutex_unlock(&omap->mutex); @@ -197,11 +202,6 @@ static int pwm_omap_dmtimer_config(struct pwm_chip *chip, dev_dbg(chip->dev, "load value: %#08x (%d), match value: %#08x (%d)\n", load_value, load_value, match_value, match_value); - omap->pdata->set_pwm(omap->dm_timer, - pwm_get_polarity(pwm) == PWM_POLARITY_INVERSED, - true, OMAP_TIMER_TRIGGER_OVERFLOW_AND_COMPARE, - true); - /* If config was called while timer was running it must be reenabled. */ if (timer_active) pwm_omap_dmtimer_start(omap); From e793eef8062f4a5b24433d0e0aac2ddf9cf71da8 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 12 Mar 2020 09:52:09 +0530 Subject: [PATCH 076/280] pwm: omap-dmtimer: Do not disable PWM before changing period/duty_cycle Only the Timer control register(TCLR) cannot be updated when the timer is running. Registers like Counter register (TCRR), loader register (TLDR) and match register (TMAR) can be updated while the counter is running. Since TCLR is not updated in pwm_omap_dmtimer_config(), do not stop the timer for period/duty_cycle update. Tested-by: Tony Lindgren Signed-off-by: Lokesh Vutla Signed-off-by: Thierry Reding --- drivers/pwm/pwm-omap-dmtimer.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 85b17b49980b..c56e7256e923 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -19,6 +19,13 @@ * Limitations: * - When PWM is stopped, timer counter gets stopped immediately. This * doesn't allow the current PWM period to complete and stops abruptly. + * - When PWM is running and changing both duty cycle and period, + * we cannot prevent in software that the output might produce + * a period with mixed settings. Especially when period/duty_cyle + * is updated while the pwm pin is high, current pwm period/duty_cycle + * can get updated as below based on the current timer counter: + * - period for current cycle = current_period + new period + * - duty_cycle for current period = current period + new duty_cycle. */ #include @@ -111,7 +118,6 @@ static int pwm_omap_dmtimer_config(struct pwm_chip *chip, u32 load_value, match_value; struct clk *fclk; unsigned long clk_rate; - bool timer_active; dev_dbg(chip->dev, "requested duty cycle: %d ns, period: %d ns\n", duty_ns, period_ns); @@ -187,25 +193,12 @@ static int pwm_omap_dmtimer_config(struct pwm_chip *chip, load_value = (DM_TIMER_MAX - period_cycles) + 1; match_value = load_value + duty_cycles - 1; - /* - * We MUST stop the associated dual-mode timer before attempting to - * write its registers, but calls to omap_dm_timer_start/stop must - * be balanced so check if timer is active before calling timer_stop. - */ - timer_active = pm_runtime_active(&omap->dm_timer_pdev->dev); - if (timer_active) - omap->pdata->stop(omap->dm_timer); - omap->pdata->set_load(omap->dm_timer, load_value); omap->pdata->set_match(omap->dm_timer, true, match_value); dev_dbg(chip->dev, "load value: %#08x (%d), match value: %#08x (%d)\n", load_value, load_value, match_value, match_value); - /* If config was called while timer was running it must be reenabled. */ - if (timer_active) - pwm_omap_dmtimer_start(omap); - mutex_unlock(&omap->mutex); return 0; From 6b28fb6f3ca30bd98815041b4eb795743706402d Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 12 Mar 2020 09:52:10 +0530 Subject: [PATCH 077/280] pwm: omap-dmtimer: Implement .apply callback Implement .apply callback and drop the legacy callbacks(enable, disable, config, set_polarity). In .apply() check for the current hardware status before changing the PWM configuration. Signed-off-by: Lokesh Vutla Tested-by: Tony Lindgren Signed-off-by: Thierry Reding --- drivers/pwm/pwm-omap-dmtimer.c | 180 +++++++++++++++++++++++---------- 1 file changed, 129 insertions(+), 51 deletions(-) diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index c56e7256e923..0d31833db2e2 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -26,6 +26,11 @@ * can get updated as below based on the current timer counter: * - period for current cycle = current_period + new period * - duty_cycle for current period = current period + new duty_cycle. + * - PWM OMAP DM timer cannot change the polarity when pwm is active. When + * user requests a change in polarity when in active state: + * - PWM is stopped abruptly(without completing the current cycle) + * - Polarity is changed + * - A fresh cycle is started. */ #include @@ -46,8 +51,18 @@ #define DM_TIMER_LOAD_MIN 0xfffffffe #define DM_TIMER_MAX 0xffffffff +/** + * struct pwm_omap_dmtimer_chip - Structure representing a pwm chip + * corresponding to omap dmtimer. + * @chip: PWM chip structure representing PWM controller + * @mutex: Mutex to protect pwm apply state + * @dm_timer: Pointer to omap dm timer. + * @pdata: Pointer to omap dm timer ops. + * dm_timer_pdev: Pointer to omap dm timer platform device + */ struct pwm_omap_dmtimer_chip { struct pwm_chip chip; + /* Mutex to protect pwm apply state */ struct mutex mutex; struct omap_dm_timer *dm_timer; const struct omap_dm_timer_ops *pdata; @@ -60,11 +75,22 @@ to_pwm_omap_dmtimer_chip(struct pwm_chip *chip) return container_of(chip, struct pwm_omap_dmtimer_chip, chip); } +/** + * pwm_omap_dmtimer_get_clock_cycles() - Get clock cycles in a time frame + * @clk_rate: pwm timer clock rate + * @ns: time frame in nano seconds. + * + * Return number of clock cycles in a given period(ins ns). + */ static u32 pwm_omap_dmtimer_get_clock_cycles(unsigned long clk_rate, int ns) { return DIV_ROUND_CLOSEST_ULL((u64)clk_rate * ns, NSEC_PER_SEC); } +/** + * pwm_omap_dmtimer_start() - Start the pwm omap dm timer in pwm mode + * @omap: Pointer to pwm omap dm timer chip + */ static void pwm_omap_dmtimer_start(struct pwm_omap_dmtimer_chip *omap) { /* @@ -82,33 +108,46 @@ static void pwm_omap_dmtimer_start(struct pwm_omap_dmtimer_chip *omap) omap->pdata->start(omap->dm_timer); } -static int pwm_omap_dmtimer_enable(struct pwm_chip *chip, - struct pwm_device *pwm) +/** + * pwm_omap_dmtimer_is_enabled() - Detect if the pwm is enabled. + * @omap: Pointer to pwm omap dm timer chip + * + * Return true if pwm is enabled else false. + */ +static bool pwm_omap_dmtimer_is_enabled(struct pwm_omap_dmtimer_chip *omap) { - struct pwm_omap_dmtimer_chip *omap = to_pwm_omap_dmtimer_chip(chip); + u32 status; - mutex_lock(&omap->mutex); - omap->pdata->set_pwm(omap->dm_timer, - pwm_get_polarity(pwm) == PWM_POLARITY_INVERSED, - true, OMAP_TIMER_TRIGGER_OVERFLOW_AND_COMPARE, - true); + status = omap->pdata->get_pwm_status(omap->dm_timer); - pwm_omap_dmtimer_start(omap); - mutex_unlock(&omap->mutex); - - return 0; + return !!(status & OMAP_TIMER_CTRL_ST); } -static void pwm_omap_dmtimer_disable(struct pwm_chip *chip, - struct pwm_device *pwm) +/** + * pwm_omap_dmtimer_polarity() - Detect the polarity of pwm. + * @omap: Pointer to pwm omap dm timer chip + * + * Return the polarity of pwm. + */ +static int pwm_omap_dmtimer_polarity(struct pwm_omap_dmtimer_chip *omap) { - struct pwm_omap_dmtimer_chip *omap = to_pwm_omap_dmtimer_chip(chip); + u32 status; - mutex_lock(&omap->mutex); - omap->pdata->stop(omap->dm_timer); - mutex_unlock(&omap->mutex); + status = omap->pdata->get_pwm_status(omap->dm_timer); + + return !!(status & OMAP_TIMER_CTRL_SCPWM); } +/** + * pwm_omap_dmtimer_config() - Update the configuration of pwm omap dm timer + * @chip: Pointer to PWM controller + * @pwm: Pointer to PWM channel + * @duty_ns: New duty cycle in nano seconds + * @period_ns: New period in nano seconds + * + * Return 0 if successfully changed the period/duty_cycle else appropriate + * error. + */ static int pwm_omap_dmtimer_config(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns) @@ -116,30 +155,26 @@ static int pwm_omap_dmtimer_config(struct pwm_chip *chip, struct pwm_omap_dmtimer_chip *omap = to_pwm_omap_dmtimer_chip(chip); u32 period_cycles, duty_cycles; u32 load_value, match_value; - struct clk *fclk; unsigned long clk_rate; + struct clk *fclk; dev_dbg(chip->dev, "requested duty cycle: %d ns, period: %d ns\n", duty_ns, period_ns); - mutex_lock(&omap->mutex); if (duty_ns == pwm_get_duty_cycle(pwm) && - period_ns == pwm_get_period(pwm)) { - /* No change - don't cause any transients. */ - mutex_unlock(&omap->mutex); + period_ns == pwm_get_period(pwm)) return 0; - } fclk = omap->pdata->get_fclk(omap->dm_timer); if (!fclk) { dev_err(chip->dev, "invalid pmtimer fclk\n"); - goto err_einval; + return -EINVAL; } clk_rate = clk_get_rate(fclk); if (!clk_rate) { dev_err(chip->dev, "invalid pmtimer fclk rate\n"); - goto err_einval; + return -EINVAL; } dev_dbg(chip->dev, "clk rate: %luHz\n", clk_rate); @@ -167,7 +202,7 @@ static int pwm_omap_dmtimer_config(struct pwm_chip *chip, dev_info(chip->dev, "period %d ns too short for clock rate %lu Hz\n", period_ns, clk_rate); - goto err_einval; + return -EINVAL; } if (duty_cycles < 1) { @@ -199,55 +234,97 @@ static int pwm_omap_dmtimer_config(struct pwm_chip *chip, dev_dbg(chip->dev, "load value: %#08x (%d), match value: %#08x (%d)\n", load_value, load_value, match_value, match_value); - mutex_unlock(&omap->mutex); - return 0; - -err_einval: - mutex_unlock(&omap->mutex); - - return -EINVAL; } -static int pwm_omap_dmtimer_set_polarity(struct pwm_chip *chip, - struct pwm_device *pwm, - enum pwm_polarity polarity) +/** + * pwm_omap_dmtimer_set_polarity() - Changes the polarity of the pwm dm timer. + * @chip: Pointer to PWM controller + * @pwm: Pointer to PWM channel + * @polarity: New pwm polarity to be set + */ +static void pwm_omap_dmtimer_set_polarity(struct pwm_chip *chip, + struct pwm_device *pwm, + enum pwm_polarity polarity) { struct pwm_omap_dmtimer_chip *omap = to_pwm_omap_dmtimer_chip(chip); + bool enabled; + + /* Disable the PWM before changing the polarity. */ + enabled = pwm_omap_dmtimer_is_enabled(omap); + if (enabled) + omap->pdata->stop(omap->dm_timer); - /* - * PWM core will not call set_polarity while PWM is enabled so it's - * safe to reconfigure the timer here without stopping it first. - */ - mutex_lock(&omap->mutex); omap->pdata->set_pwm(omap->dm_timer, polarity == PWM_POLARITY_INVERSED, true, OMAP_TIMER_TRIGGER_OVERFLOW_AND_COMPARE, true); + + if (enabled) + pwm_omap_dmtimer_start(omap); +} + +/** + * pwm_omap_dmtimer_apply() - Changes the state of the pwm omap dm timer. + * @chip: Pointer to PWM controller + * @pwm: Pointer to PWM channel + * @state: New state to apply + * + * Return 0 if successfully changed the state else appropriate error. + */ +static int pwm_omap_dmtimer_apply(struct pwm_chip *chip, + struct pwm_device *pwm, + const struct pwm_state *state) +{ + struct pwm_omap_dmtimer_chip *omap = to_pwm_omap_dmtimer_chip(chip); + int ret = 0; + + mutex_lock(&omap->mutex); + + if (pwm_omap_dmtimer_is_enabled(omap) && !state->enabled) { + omap->pdata->stop(omap->dm_timer); + goto unlock_mutex; + } + + if (pwm_omap_dmtimer_polarity(omap) != state->polarity) + pwm_omap_dmtimer_set_polarity(chip, pwm, state->polarity); + + ret = pwm_omap_dmtimer_config(chip, pwm, state->duty_cycle, + state->period); + if (ret) + goto unlock_mutex; + + if (!pwm_omap_dmtimer_is_enabled(omap) && state->enabled) { + omap->pdata->set_pwm(omap->dm_timer, + state->polarity == PWM_POLARITY_INVERSED, + true, + OMAP_TIMER_TRIGGER_OVERFLOW_AND_COMPARE, + true); + pwm_omap_dmtimer_start(omap); + } + +unlock_mutex: mutex_unlock(&omap->mutex); - return 0; + return ret; } static const struct pwm_ops pwm_omap_dmtimer_ops = { - .enable = pwm_omap_dmtimer_enable, - .disable = pwm_omap_dmtimer_disable, - .config = pwm_omap_dmtimer_config, - .set_polarity = pwm_omap_dmtimer_set_polarity, + .apply = pwm_omap_dmtimer_apply, .owner = THIS_MODULE, }; static int pwm_omap_dmtimer_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; - struct device_node *timer; - struct platform_device *timer_pdev; - struct pwm_omap_dmtimer_chip *omap; struct dmtimer_platform_data *timer_pdata; const struct omap_dm_timer_ops *pdata; + struct platform_device *timer_pdev; + struct pwm_omap_dmtimer_chip *omap; struct omap_dm_timer *dm_timer; - u32 v; + struct device_node *timer; int ret = 0; + u32 v; timer = of_parse_phandle(np, "ti,timers", 0); if (!timer) @@ -280,6 +357,7 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev) !pdata->set_load || !pdata->set_match || !pdata->set_pwm || + !pdata->get_pwm_status || !pdata->set_prescaler || !pdata->write_counter) { dev_err(&pdev->dev, "Incomplete dmtimer pdata structure\n"); From d3817a647059a3e5f8791e9b7225d194985aa75f Mon Sep 17 00:00:00 2001 From: Pascal Roeleven Date: Tue, 17 Mar 2020 16:59:03 +0100 Subject: [PATCH 078/280] pwm: sun4i: Remove redundant needs_delay 'needs_delay' does now always evaluate to true, so remove all occurrences. Signed-off-by: Pascal Roeleven Signed-off-by: Thierry Reding --- drivers/pwm/pwm-sun4i.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c index 3e3efa6c768f..5c677c563349 100644 --- a/drivers/pwm/pwm-sun4i.c +++ b/drivers/pwm/pwm-sun4i.c @@ -90,7 +90,6 @@ struct sun4i_pwm_chip { spinlock_t ctrl_lock; const struct sun4i_pwm_data *data; unsigned long next_period[2]; - bool needs_delay[2]; }; static inline struct sun4i_pwm_chip *to_sun4i_pwm_chip(struct pwm_chip *chip) @@ -287,7 +286,6 @@ static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, sun4i_pwm_writel(sun4i_pwm, val, PWM_CH_PRD(pwm->hwpwm)); sun4i_pwm->next_period[pwm->hwpwm] = jiffies + usecs_to_jiffies(cstate.period / 1000 + 1); - sun4i_pwm->needs_delay[pwm->hwpwm] = true; if (state->polarity != PWM_POLARITY_NORMAL) ctrl &= ~BIT_CH(PWM_ACT_STATE, pwm->hwpwm); @@ -298,7 +296,7 @@ static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, if (state->enabled) { ctrl |= BIT_CH(PWM_EN, pwm->hwpwm); - } else if (!sun4i_pwm->needs_delay[pwm->hwpwm]) { + } else { ctrl &= ~BIT_CH(PWM_EN, pwm->hwpwm); ctrl &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm); } @@ -310,15 +308,9 @@ static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, if (state->enabled) return 0; - if (!sun4i_pwm->needs_delay[pwm->hwpwm]) { - clk_disable_unprepare(sun4i_pwm->clk); - return 0; - } - /* We need a full period to elapse before disabling the channel. */ now = jiffies; - if (sun4i_pwm->needs_delay[pwm->hwpwm] && - time_before(now, sun4i_pwm->next_period[pwm->hwpwm])) { + if (time_before(now, sun4i_pwm->next_period[pwm->hwpwm])) { delay_us = jiffies_to_usecs(sun4i_pwm->next_period[pwm->hwpwm] - now); if ((delay_us / 500) > MAX_UDELAY_MS) @@ -326,7 +318,6 @@ static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, else usleep_range(delay_us, delay_us * 2); } - sun4i_pwm->needs_delay[pwm->hwpwm] = false; spin_lock(&sun4i_pwm->ctrl_lock); ctrl = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG); From ce1f9cece057843a03a6b9de361a03eb37dd3fac Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 23 Mar 2020 15:24:18 +0100 Subject: [PATCH 079/280] pwm: jz4740: Use clocks from TCU driver The ingenic-timer "TCU" driver provides us with clocks, that can be (un)gated, reparented or reclocked from devicetree, instead of having these settings hardcoded in this driver. The new code now uses a clk pointer per PWM (instead of a clk per pwm-chip before). So the pointer is stored in per-pwm data now. The calls to arch-specific timer code is replaced with standard clock API calls to start and stop each channel's clock. While this driver is devicetree-compatible, it is never (as of now) probed from devicetree, so this change does not introduce a ABI problem with current devicetree files. Signed-off-by: Paul Cercueil Tested-by: Mathieu Malaterre Tested-by: Artur Rojek Signed-off-by: Thierry Reding --- drivers/pwm/Kconfig | 1 + drivers/pwm/pwm-jz4740.c | 54 +++++++++++++++++++++++++++++----------- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index 0e64c62d4b6b..6729b0ba1252 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -235,6 +235,7 @@ config PWM_IMX_TPM config PWM_JZ4740 tristate "Ingenic JZ47xx PWM support" depends on MACH_INGENIC + depends on COMMON_CLK help Generic PWM framework driver for Ingenic JZ47xx based machines. diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c index 9d78cc21cb12..2eb31f2c7717 100644 --- a/drivers/pwm/pwm-jz4740.c +++ b/drivers/pwm/pwm-jz4740.c @@ -24,7 +24,6 @@ struct jz4740_pwm_chip { struct pwm_chip chip; - struct clk *clk; }; static inline struct jz4740_pwm_chip *to_jz4740(struct pwm_chip *chip) @@ -34,6 +33,11 @@ static inline struct jz4740_pwm_chip *to_jz4740(struct pwm_chip *chip) static int jz4740_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) { + struct jz4740_pwm_chip *jz = to_jz4740(chip); + struct clk *clk; + char name[16]; + int err; + /* * Timers 0 and 1 are used for system tasks, so they are unavailable * for use as PWMs. @@ -41,16 +45,33 @@ static int jz4740_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) if (pwm->hwpwm < 2) return -EBUSY; - jz4740_timer_start(pwm->hwpwm); + snprintf(name, sizeof(name), "timer%u", pwm->hwpwm); + + clk = clk_get(chip->dev, name); + if (IS_ERR(clk)) { + if (PTR_ERR(clk) != -EPROBE_DEFER) + dev_err(chip->dev, "Failed to get clock: %pe", clk); + + return PTR_ERR(clk); + } + + err = clk_prepare_enable(clk); + if (err < 0) { + clk_put(clk); + return err; + } + + pwm_set_chip_data(pwm, clk); return 0; } static void jz4740_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) { - jz4740_timer_set_ctrl(pwm->hwpwm, 0); + struct clk *clk = pwm_get_chip_data(pwm); - jz4740_timer_stop(pwm->hwpwm); + clk_disable_unprepare(clk); + clk_put(clk); } static int jz4740_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) @@ -91,17 +112,22 @@ static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { struct jz4740_pwm_chip *jz4740 = to_jz4740(pwm->chip); + struct clk *clk = pwm_get_chip_data(pwm), + *parent_clk = clk_get_parent(clk); + unsigned long rate, period, duty; unsigned long long tmp; - unsigned long period, duty; unsigned int prescaler = 0; uint16_t ctrl; + int err; - tmp = (unsigned long long)clk_get_rate(jz4740->clk) * state->period; + rate = clk_get_rate(parent_clk); + tmp = (unsigned long long)rate * state->period; do_div(tmp, 1000000000); period = tmp; while (period > 0xffff && prescaler < 6) { period >>= 2; + rate >>= 2; ++prescaler; } @@ -117,14 +143,18 @@ static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, jz4740_pwm_disable(chip, pwm); + err = clk_set_rate(clk, rate); + if (err) { + dev_err(chip->dev, "Unable to set rate: %d", err); + return err; + } + jz4740_timer_set_count(pwm->hwpwm, 0); jz4740_timer_set_duty(pwm->hwpwm, duty); jz4740_timer_set_period(pwm->hwpwm, period); - ctrl = JZ_TIMER_CTRL_PRESCALER(prescaler) | JZ_TIMER_CTRL_SRC_EXT | - JZ_TIMER_CTRL_PWM_ABBRUPT_SHUTDOWN; - - jz4740_timer_set_ctrl(pwm->hwpwm, ctrl); + ctrl = jz4740_timer_get_ctrl(pwm->hwpwm); + ctrl |= JZ_TIMER_CTRL_PWM_ABBRUPT_SHUTDOWN; switch (state->polarity) { case PWM_POLARITY_NORMAL: @@ -158,10 +188,6 @@ static int jz4740_pwm_probe(struct platform_device *pdev) if (!jz4740) return -ENOMEM; - jz4740->clk = devm_clk_get(&pdev->dev, "ext"); - if (IS_ERR(jz4740->clk)) - return PTR_ERR(jz4740->clk); - jz4740->chip.dev = &pdev->dev; jz4740->chip.ops = &jz4740_pwm_ops; jz4740->chip.npwm = NUM_PWM; From 485b56f08f3342f353369ee6f9c26053efb2b925 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 23 Mar 2020 15:24:19 +0100 Subject: [PATCH 080/280] pwm: jz4740: Improve algorithm of clock calculation The previous algorithm hardcoded details about how the TCU clocks work. The new algorithm will use clk_round_rate to find the perfect clock rate for the PWM channel. This code relies on the fact that clk_round_rate() will always round down, which is not a valid assumption given by the clk API, but only happens to be true with the clk drivers used for Ingenic SoCs. Right now, there is no alternative as the clk API does not have a round-down function (and won't have one for a while), but if it ever comes to light, a round-down function should be used instead. Signed-off-by: Paul Cercueil Tested-by: Mathieu Malaterre Tested-by: Artur Rojek Signed-off-by: Thierry Reding --- drivers/pwm/pwm-jz4740.c | 44 ++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c index 2eb31f2c7717..f4c6c69d0bf6 100644 --- a/drivers/pwm/pwm-jz4740.c +++ b/drivers/pwm/pwm-jz4740.c @@ -112,28 +112,42 @@ static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { struct jz4740_pwm_chip *jz4740 = to_jz4740(pwm->chip); - struct clk *clk = pwm_get_chip_data(pwm), - *parent_clk = clk_get_parent(clk); - unsigned long rate, period, duty; - unsigned long long tmp; - unsigned int prescaler = 0; + unsigned long long tmp = 0xffffull * NSEC_PER_SEC; + struct clk *clk = pwm_get_chip_data(pwm); + unsigned long period, duty; uint16_t ctrl; + long rate; int err; - rate = clk_get_rate(parent_clk); - tmp = (unsigned long long)rate * state->period; - do_div(tmp, 1000000000); - period = tmp; + /* + * Limit the clock to a maximum rate that still gives us a period value + * which fits in 16 bits. + */ + do_div(tmp, state->period); - while (period > 0xffff && prescaler < 6) { - period >>= 2; - rate >>= 2; - ++prescaler; + /* + * /!\ IMPORTANT NOTE: + * ------------------- + * This code relies on the fact that clk_round_rate() will always round + * down, which is not a valid assumption given by the clk API, but only + * happens to be true with the clk drivers used for Ingenic SoCs. + * + * Right now, there is no alternative as the clk API does not have a + * round-down function (and won't have one for a while), but if it ever + * comes to light, a round-down function should be used instead. + */ + rate = clk_round_rate(clk, tmp); + if (rate < 0) { + dev_err(chip->dev, "Unable to round rate: %ld", rate); + return rate; } - if (prescaler == 6) - return -EINVAL; + /* Calculate period value */ + tmp = (unsigned long long)rate * state->period; + do_div(tmp, NSEC_PER_SEC); + period = (unsigned long)tmp; + /* Calculate duty value */ tmp = (unsigned long long)period * state->duty_cycle; do_div(tmp, state->period); duty = period - tmp; From c2693514a0a1ec3abcb4673775af3edc797e8a07 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 23 Mar 2020 15:24:20 +0100 Subject: [PATCH 081/280] pwm: jz4740: Obtain regmap from parent node The TCU registers are shared between a handful of drivers, accessing them through the same regmap. While this driver is devicetree-compatible, it is never (as of now) probed from devicetree, so this change does not introduce a ABI problem with current devicetree files. Signed-off-by: Paul Cercueil Tested-by: Mathieu Malaterre Tested-by: Artur Rojek Signed-off-by: Thierry Reding --- drivers/pwm/Kconfig | 1 + drivers/pwm/pwm-jz4740.c | 67 ++++++++++++++++++++++++++-------------- 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index 6729b0ba1252..eebbc917ac97 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -236,6 +236,7 @@ config PWM_JZ4740 tristate "Ingenic JZ47xx PWM support" depends on MACH_INGENIC depends on COMMON_CLK + select MFD_SYSCON help Generic PWM framework driver for Ingenic JZ47xx based machines. diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c index f4c6c69d0bf6..549e6c750bac 100644 --- a/drivers/pwm/pwm-jz4740.c +++ b/drivers/pwm/pwm-jz4740.c @@ -13,17 +13,19 @@ #include #include #include +#include +#include #include #include #include #include - -#include +#include #define NUM_PWM 8 struct jz4740_pwm_chip { struct pwm_chip chip; + struct regmap *map; }; static inline struct jz4740_pwm_chip *to_jz4740(struct pwm_chip *chip) @@ -76,36 +78,39 @@ static void jz4740_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) static int jz4740_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) { - uint32_t ctrl = jz4740_timer_get_ctrl(pwm->pwm); + struct jz4740_pwm_chip *jz = to_jz4740(chip); - ctrl |= JZ_TIMER_CTRL_PWM_ENABLE; - jz4740_timer_set_ctrl(pwm->hwpwm, ctrl); - jz4740_timer_enable(pwm->hwpwm); + /* Enable PWM output */ + regmap_update_bits(jz->map, TCU_REG_TCSRc(pwm->hwpwm), + TCU_TCSR_PWM_EN, TCU_TCSR_PWM_EN); + + /* Start counter */ + regmap_write(jz->map, TCU_REG_TESR, BIT(pwm->hwpwm)); return 0; } static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) { - uint32_t ctrl = jz4740_timer_get_ctrl(pwm->hwpwm); + struct jz4740_pwm_chip *jz = to_jz4740(chip); /* * Set duty > period. This trick allows the TCU channels in TCU2 mode to * properly return to their init level. */ - jz4740_timer_set_duty(pwm->hwpwm, 0xffff); - jz4740_timer_set_period(pwm->hwpwm, 0x0); + regmap_write(jz->map, TCU_REG_TDHRc(pwm->hwpwm), 0xffff); + regmap_write(jz->map, TCU_REG_TDFRc(pwm->hwpwm), 0x0); /* * Disable PWM output. * In TCU2 mode (channel 1/2 on JZ4750+), this must be done before the * counter is stopped, while in TCU1 mode the order does not matter. */ - ctrl &= ~JZ_TIMER_CTRL_PWM_ENABLE; - jz4740_timer_set_ctrl(pwm->hwpwm, ctrl); + regmap_update_bits(jz->map, TCU_REG_TCSRc(pwm->hwpwm), + TCU_TCSR_PWM_EN, 0); /* Stop counter */ - jz4740_timer_disable(pwm->hwpwm); + regmap_write(jz->map, TCU_REG_TECR, BIT(pwm->hwpwm)); } static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, @@ -115,7 +120,6 @@ static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, unsigned long long tmp = 0xffffull * NSEC_PER_SEC; struct clk *clk = pwm_get_chip_data(pwm); unsigned long period, duty; - uint16_t ctrl; long rate; int err; @@ -163,24 +167,32 @@ static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, return err; } - jz4740_timer_set_count(pwm->hwpwm, 0); - jz4740_timer_set_duty(pwm->hwpwm, duty); - jz4740_timer_set_period(pwm->hwpwm, period); + /* Reset counter to 0 */ + regmap_write(jz4740->map, TCU_REG_TCNTc(pwm->hwpwm), 0); - ctrl = jz4740_timer_get_ctrl(pwm->hwpwm); - ctrl |= JZ_TIMER_CTRL_PWM_ABBRUPT_SHUTDOWN; + /* Set duty */ + regmap_write(jz4740->map, TCU_REG_TDHRc(pwm->hwpwm), duty); + /* Set period */ + regmap_write(jz4740->map, TCU_REG_TDFRc(pwm->hwpwm), period); + + /* Set abrupt shutdown */ + regmap_update_bits(jz4740->map, TCU_REG_TCSRc(pwm->hwpwm), + TCU_TCSR_PWM_SD, TCU_TCSR_PWM_SD); + + /* Set polarity */ switch (state->polarity) { case PWM_POLARITY_NORMAL: - ctrl &= ~JZ_TIMER_CTRL_PWM_ACTIVE_LOW; + regmap_update_bits(jz4740->map, TCU_REG_TCSRc(pwm->hwpwm), + TCU_TCSR_PWM_INITL_HIGH, 0); break; case PWM_POLARITY_INVERSED: - ctrl |= JZ_TIMER_CTRL_PWM_ACTIVE_LOW; + regmap_update_bits(jz4740->map, TCU_REG_TCSRc(pwm->hwpwm), + TCU_TCSR_PWM_INITL_HIGH, + TCU_TCSR_PWM_INITL_HIGH); break; } - jz4740_timer_set_ctrl(pwm->hwpwm, ctrl); - if (state->enabled) jz4740_pwm_enable(chip, pwm); @@ -196,13 +208,20 @@ static const struct pwm_ops jz4740_pwm_ops = { static int jz4740_pwm_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct jz4740_pwm_chip *jz4740; - jz4740 = devm_kzalloc(&pdev->dev, sizeof(*jz4740), GFP_KERNEL); + jz4740 = devm_kzalloc(dev, sizeof(*jz4740), GFP_KERNEL); if (!jz4740) return -ENOMEM; - jz4740->chip.dev = &pdev->dev; + jz4740->map = device_node_to_regmap(dev->parent->of_node); + if (IS_ERR(jz4740->map)) { + dev_err(dev, "regmap not found: %ld\n", PTR_ERR(jz4740->map)); + return PTR_ERR(jz4740->map); + } + + jz4740->chip.dev = dev; jz4740->chip.ops = &jz4740_pwm_ops; jz4740->chip.npwm = NUM_PWM; jz4740->chip.base = -1; From a2005fc791798f40cc9895b6abda8fbc7a168174 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 23 Mar 2020 15:24:21 +0100 Subject: [PATCH 082/280] pwm: jz4740: Allow selection of PWM channels 0 and 1 The TCU channels 0 and 1 were previously reserved for system tasks, and thus unavailable for PWM. Signed-off-by: Paul Cercueil Tested-by: Mathieu Malaterre Tested-by: Artur Rojek Signed-off-by: Thierry Reding --- drivers/pwm/pwm-jz4740.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c index 549e6c750bac..3cd5c054ad9a 100644 --- a/drivers/pwm/pwm-jz4740.c +++ b/drivers/pwm/pwm-jz4740.c @@ -33,6 +33,19 @@ static inline struct jz4740_pwm_chip *to_jz4740(struct pwm_chip *chip) return container_of(chip, struct jz4740_pwm_chip, chip); } +static bool jz4740_pwm_can_use_chn(struct jz4740_pwm_chip *jz, + unsigned int channel) +{ + /* Enable all TCU channels for PWM use by default except channels 0/1 */ + u32 pwm_channels_mask = GENMASK(NUM_PWM - 1, 2); + + device_property_read_u32(jz->chip.dev->parent, + "ingenic,pwm-channels-mask", + &pwm_channels_mask); + + return !!(pwm_channels_mask & BIT(channel)); +} + static int jz4740_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) { struct jz4740_pwm_chip *jz = to_jz4740(chip); @@ -40,11 +53,7 @@ static int jz4740_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) char name[16]; int err; - /* - * Timers 0 and 1 are used for system tasks, so they are unavailable - * for use as PWMs. - */ - if (pwm->hwpwm < 2) + if (!jz4740_pwm_can_use_chn(jz, pwm->hwpwm)) return -EBUSY; snprintf(name, sizeof(name), "timer%u", pwm->hwpwm); From ec14b65ab6bcd583967880edd9688c7540cf5496 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Thu, 19 Mar 2020 21:49:44 +0100 Subject: [PATCH 083/280] ASoC: Intel: broadwell: Remove ignore_suspend flag from SSP0 dai link As of commit: ASoC: soc-core: care .ignore_suspend for Component suspend function soc-core::snd_soc_suspend no longer ignores 'ignore_suspend' flag for dai links. While BE dai link for System Pin is supposed to follow standard suspend-resume flow, appended 'ignore_suspend' flag disturbs that flow and causes audio to break right after resume. Remove the flag to address this. Link to first message in conversation: https://lkml.org/lkml/2020/3/18/54 Reported-by: Dominik Brodowski Suggested-by: Mark Brown Signed-off-by: Cezary Rojewski Acked-by: Pierre-Louis Bossart Cc: Kuninori Morimoto Cc: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200319204947.18963-2-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/broadwell.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/intel/boards/broadwell.c b/sound/soc/intel/boards/broadwell.c index acb4e36682cb..f9a8336a0541 100644 --- a/sound/soc/intel/boards/broadwell.c +++ b/sound/soc/intel/boards/broadwell.c @@ -217,7 +217,6 @@ static struct snd_soc_dai_link broadwell_rt286_dais[] = { .init = broadwell_rt286_codec_init, .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS, - .ignore_suspend = 1, .ignore_pmdown_time = 1, .be_hw_params_fixup = broadwell_ssp0_fixup, .ops = &broadwell_rt286_ops, From a99661531e129f41f356bcbf6f57aee3695b6940 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Thu, 19 Mar 2020 21:49:45 +0100 Subject: [PATCH 084/280] ASoC: Intel: haswell: Remove ignore_suspend flag from SSP0 dai link As of commit: ASoC: soc-core: care .ignore_suspend for Component suspend function soc-core::snd_soc_suspend no longer ignores 'ignore_suspend' flag for dai links. While BE dai link for System Pin is supposed to follow standard suspend-resume flow, appended 'ignore_suspend' flag disturbs that flow and causes audio to break right after resume. Remove the flag to address this. Signed-off-by: Cezary Rojewski Acked-by: Pierre-Louis Bossart Cc: Kuninori Morimoto Cc: Pierre-Louis Bossart Cc: Dominik Brodowski Cc: Mark Brown Link: https://lore.kernel.org/r/20200319204947.18963-3-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/haswell.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/intel/boards/haswell.c b/sound/soc/intel/boards/haswell.c index 3ed53d7db4e6..74af090f2657 100644 --- a/sound/soc/intel/boards/haswell.c +++ b/sound/soc/intel/boards/haswell.c @@ -162,7 +162,6 @@ static struct snd_soc_dai_link haswell_rt5640_dais[] = { .no_pcm = 1, .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS, - .ignore_suspend = 1, .ignore_pmdown_time = 1, .be_hw_params_fixup = haswell_ssp0_fixup, .ops = &haswell_rt5640_ops, From b0ada40cb80d7e427fb719a4e6029935639fa668 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Thu, 19 Mar 2020 21:49:46 +0100 Subject: [PATCH 085/280] ASoC: Intel: bdw-rt5677: Remove ignore_suspend flag from SSP0 dai link As of commit: ASoC: soc-core: care .ignore_suspend for Component suspend function soc-core::snd_soc_suspend no longer ignores 'ignore_suspend' flag for dai links. While BE dai link for System Pin is supposed to follow standard suspend-resume flow, appended 'ignore_suspend' flag disturbs that flow and causes audio to break right after resume. Remove the flag to address this. Signed-off-by: Cezary Rojewski Acked-by: Pierre-Louis Bossart Cc: Kuninori Morimoto Cc: Pierre-Louis Bossart Cc: Dominik Brodowski Cc: Mark Brown Link: https://lore.kernel.org/r/20200319204947.18963-4-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bdw-rt5677.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/intel/boards/bdw-rt5677.c b/sound/soc/intel/boards/bdw-rt5677.c index 6b4b64098d36..cc41a348295e 100644 --- a/sound/soc/intel/boards/bdw-rt5677.c +++ b/sound/soc/intel/boards/bdw-rt5677.c @@ -340,7 +340,6 @@ static struct snd_soc_dai_link bdw_rt5677_dais[] = { .no_pcm = 1, .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS, - .ignore_suspend = 1, .ignore_pmdown_time = 1, .be_hw_params_fixup = broadwell_ssp0_fixup, .ops = &bdw_rt5677_ops, From 793012c6c586fefef3abd45c9d2b94df042907b0 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Thu, 19 Mar 2020 21:49:47 +0100 Subject: [PATCH 086/280] ASoC: Intel: bdw-rt5650: Remove ignore_suspend flag from SSP0 dai link As of commit: ASoC: soc-core: care .ignore_suspend for Component suspend function soc-core::snd_soc_suspend no longer ignores 'ignore_suspend' flag for dai links. While BE dai link for System Pin is supposed to follow standard suspend-resume flow, appended 'ignore_suspend' flag disturbs that flow and causes audio to break right after resume. Remove the flag to address this. Signed-off-by: Cezary Rojewski Acked-by: Pierre-Louis Bossart Cc: Kuninori Morimoto Cc: Pierre-Louis Bossart Cc: Dominik Brodowski Cc: Mark Brown Link: https://lore.kernel.org/r/20200319204947.18963-5-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bdw-rt5650.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/intel/boards/bdw-rt5650.c b/sound/soc/intel/boards/bdw-rt5650.c index 6c2fdb5659ed..af2f50293208 100644 --- a/sound/soc/intel/boards/bdw-rt5650.c +++ b/sound/soc/intel/boards/bdw-rt5650.c @@ -254,7 +254,6 @@ static struct snd_soc_dai_link bdw_rt5650_dais[] = { .no_pcm = 1, .dai_fmt = SND_SOC_DAIFMT_DSP_B | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS, - .ignore_suspend = 1, .ignore_pmdown_time = 1, .be_hw_params_fixup = broadwell_ssp0_fixup, .ops = &bdw_rt5650_ops, From 0ab070917afdc93670c2d0ea02ab6defb6246a7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EA=B2=BD=ED=83=9D?= Date: Mon, 30 Mar 2020 16:35:59 +0900 Subject: [PATCH 087/280] ASoC: fix regwmask If regwshift is 32 and the selected architecture compiles '<<' operator for signed int literal into rotating shift, '1< Link: https://lore.kernel.org/r/001001d60665$db7af3e0$9270dba0$@samsung.com Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 652657dc6809..55ffb34be95e 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -825,7 +825,7 @@ int snd_soc_get_xr_sx(struct snd_kcontrol *kcontrol, unsigned int regbase = mc->regbase; unsigned int regcount = mc->regcount; unsigned int regwshift = component->val_bytes * BITS_PER_BYTE; - unsigned int regwmask = (1<invert; unsigned long mask = (1UL<nbits)-1; long min = mc->min; @@ -874,7 +874,7 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, unsigned int regbase = mc->regbase; unsigned int regcount = mc->regcount; unsigned int regwshift = component->val_bytes * BITS_PER_BYTE; - unsigned int regwmask = (1<invert; unsigned long mask = (1UL<nbits)-1; long max = mc->max; From 1ba616bd1a6d5ebdb31ceaa9265b2a2bb670155b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 30 Mar 2020 11:06:02 -0500 Subject: [PATCH 088/280] ASoC: soc-dai: fix DAI startup/shutdown sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The addition of a single flag to track the DAI status prevents the DAI startup sequence from being called on capture if the DAI is already used for playback. Fix by extending the existing code with one flag per direction. Fixes: b56be800f1292 ("ASoC: soc-pcm: call snd_soc_dai_startup()/shutdown() once") Reported-by: Amadeusz Sławiński Signed-off-by: Pierre-Louis Bossart Tested-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20200330160602.10180-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 2 +- sound/soc/soc-dai.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 78bac995db15..d4825b82c7a3 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -351,7 +351,7 @@ struct snd_soc_dai { /* bit field */ unsigned int probed:1; - unsigned int started:1; + unsigned int started[SNDRV_PCM_STREAM_LAST + 1]; }; static inline struct snd_soc_pcm_stream * diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c index 19142f6e533c..8f3cad8db89a 100644 --- a/sound/soc/soc-dai.c +++ b/sound/soc/soc-dai.c @@ -295,12 +295,12 @@ int snd_soc_dai_startup(struct snd_soc_dai *dai, { int ret = 0; - if (!dai->started && + if (!dai->started[substream->stream] && dai->driver->ops->startup) ret = dai->driver->ops->startup(substream, dai); if (ret == 0) - dai->started = 1; + dai->started[substream->stream] = 1; return ret; } @@ -308,11 +308,11 @@ int snd_soc_dai_startup(struct snd_soc_dai *dai, void snd_soc_dai_shutdown(struct snd_soc_dai *dai, struct snd_pcm_substream *substream) { - if (dai->started && + if (dai->started[substream->stream] && dai->driver->ops->shutdown) dai->driver->ops->shutdown(substream, dai); - dai->started = 0; + dai->started[substream->stream] = 0; } int snd_soc_dai_prepare(struct snd_soc_dai *dai, From 70cbddb973859158731ce77ab20cd5e53822c089 Mon Sep 17 00:00:00 2001 From: Hu Haowen Date: Mon, 30 Mar 2020 12:54:36 +0800 Subject: [PATCH 089/280] arch/xtensa: fix grammar in Kconfig help text Spell "Don't" correctly in the XTENSA_VARIANT_CUSTOM_NAME help text. Signed-off-by: Hu Haowen Message-Id: <20200330045436.12645-1-xianfengting221@163.com> Signed-off-by: Max Filippov --- arch/xtensa/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 32ee759a3fda..60ce5193af8a 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -122,7 +122,7 @@ config XTENSA_VARIANT_CUSTOM_NAME help Provide the name of a custom Xtensa processor variant. This CORENAME selects arch/xtensa/variant/CORENAME. - Dont forget you have to select MMU if you have one. + Don't forget you have to select MMU if you have one. config XTENSA_VARIANT_NAME string From c95b708d5fa65b4e51f088ee077d127fd5a57b70 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Sat, 28 Mar 2020 01:09:09 -0400 Subject: [PATCH 090/280] nvme: fix compat address handling in several ioctls On a 32-bit kernel, the upper bits of userspace addresses passed via various ioctls are silently ignored by the nvme driver. However on a 64-bit kernel running a compat task, these upper bits are not ignored and are in fact required to be zero for the ioctls to work. Unfortunately, this difference matters. 32-bit smartctl submits the NVME_IOCTL_ADMIN_CMD ioctl with garbage in these upper bits because it seems the pointer value it puts into the nvme_passthru_cmd structure is sign extended. This works fine on 32-bit kernels but fails on a 64-bit one because (at least on my setup) the addresses smartctl uses are consistently above 2G. For example: # smartctl -x /dev/nvme0n1 smartctl 7.1 2019-12-30 r5022 [x86_64-linux-5.5.11] (local build) Copyright (C) 2002-19, Bruce Allen, Christian Franke, www.smartmontools.org Read NVMe Identify Controller failed: NVME_IOCTL_ADMIN_CMD: Bad address Since changing 32-bit kernels to actually check all of the submitted address bits now would break existing userspace, this patch fixes the compat problem by explicitly zeroing the upper bits in the compat case. This enables 32-bit smartctl to work on a 64-bit kernel. Signed-off-by: Nick Bowler Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4f907e3beda1..2db8563aeb2d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -1252,6 +1253,18 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl) queue_work(nvme_wq, &ctrl->async_event_work); } +/* + * Convert integer values from ioctl structures to user pointers, silently + * ignoring the upper bits in the compat case to match behaviour of 32-bit + * kernels. + */ +static void __user *nvme_to_user_ptr(uintptr_t ptrval) +{ + if (in_compat_syscall()) + ptrval = (compat_uptr_t)ptrval; + return (void __user *)ptrval; +} + static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { struct nvme_user_io io; @@ -1275,7 +1288,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) length = (io.nblocks + 1) << ns->lba_shift; meta_len = (io.nblocks + 1) * ns->ms; - metadata = (void __user *)(uintptr_t)io.metadata; + metadata = nvme_to_user_ptr(io.metadata); if (ns->ext) { length += meta_len; @@ -1298,7 +1311,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.appmask = cpu_to_le16(io.appmask); return nvme_submit_user_cmd(ns->queue, &c, - (void __user *)(uintptr_t)io.addr, length, + nvme_to_user_ptr(io.addr), length, metadata, meta_len, lower_32_bits(io.slba), NULL, 0); } @@ -1418,9 +1431,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, effects = nvme_passthru_start(ctrl, ns, cmd.opcode); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - (void __user *)(uintptr_t)cmd.addr, cmd.data_len, - (void __user *)(uintptr_t)cmd.metadata, - cmd.metadata_len, 0, &result, timeout); + nvme_to_user_ptr(cmd.addr), cmd.data_len, + nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, + 0, &result, timeout); nvme_passthru_end(ctrl, effects); if (status >= 0) { @@ -1465,8 +1478,8 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, effects = nvme_passthru_start(ctrl, ns, cmd.opcode); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - (void __user *)(uintptr_t)cmd.addr, cmd.data_len, - (void __user *)(uintptr_t)cmd.metadata, cmd.metadata_len, + nvme_to_user_ptr(cmd.addr), cmd.data_len, + nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 0, &cmd.result, timeout); nvme_passthru_end(ctrl, effects); From 38803fcffb5baf40cd403c1bd980f22308aefee8 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 18 Mar 2020 14:41:12 -0700 Subject: [PATCH 091/280] nvme-fcloop: fix deallocation of working context There's been a longstanding bug of LS completions which freed ls ops, particularly the disconnect LS, while executing on a work context that is in the memory being free. Not a good thing to do. Rework LS handling to make callbacks in the rport context rather than the ls_request context. Signed-off-by: James Smart Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 76 ++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 1c50af6219f3..9861fcea39f6 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -198,10 +198,13 @@ struct fcloop_lport_priv { }; struct fcloop_rport { - struct nvme_fc_remote_port *remoteport; - struct nvmet_fc_target_port *targetport; - struct fcloop_nport *nport; - struct fcloop_lport *lport; + struct nvme_fc_remote_port *remoteport; + struct nvmet_fc_target_port *targetport; + struct fcloop_nport *nport; + struct fcloop_lport *lport; + spinlock_t lock; + struct list_head ls_list; + struct work_struct ls_work; }; struct fcloop_tport { @@ -224,11 +227,10 @@ struct fcloop_nport { }; struct fcloop_lsreq { - struct fcloop_tport *tport; struct nvmefc_ls_req *lsreq; - struct work_struct work; struct nvmefc_tgt_ls_req tgt_ls_req; int status; + struct list_head ls_list; /* fcloop_rport->ls_list */ }; struct fcloop_rscn { @@ -292,21 +294,32 @@ fcloop_delete_queue(struct nvme_fc_local_port *localport, { } - -/* - * Transmit of LS RSP done (e.g. buffers all set). call back up - * initiator "done" flows. - */ static void -fcloop_tgt_lsrqst_done_work(struct work_struct *work) +fcloop_rport_lsrqst_work(struct work_struct *work) { - struct fcloop_lsreq *tls_req = - container_of(work, struct fcloop_lsreq, work); - struct fcloop_tport *tport = tls_req->tport; - struct nvmefc_ls_req *lsreq = tls_req->lsreq; + struct fcloop_rport *rport = + container_of(work, struct fcloop_rport, ls_work); + struct fcloop_lsreq *tls_req; - if (!tport || tport->remoteport) - lsreq->done(lsreq, tls_req->status); + spin_lock(&rport->lock); + for (;;) { + tls_req = list_first_entry_or_null(&rport->ls_list, + struct fcloop_lsreq, ls_list); + if (!tls_req) + break; + + list_del(&tls_req->ls_list); + spin_unlock(&rport->lock); + + tls_req->lsreq->done(tls_req->lsreq, tls_req->status); + /* + * callee may free memory containing tls_req. + * do not reference lsreq after this. + */ + + spin_lock(&rport->lock); + } + spin_unlock(&rport->lock); } static int @@ -319,17 +332,18 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, int ret = 0; tls_req->lsreq = lsreq; - INIT_WORK(&tls_req->work, fcloop_tgt_lsrqst_done_work); + INIT_LIST_HEAD(&tls_req->ls_list); if (!rport->targetport) { tls_req->status = -ECONNREFUSED; - tls_req->tport = NULL; - schedule_work(&tls_req->work); + spin_lock(&rport->lock); + list_add_tail(&rport->ls_list, &tls_req->ls_list); + spin_unlock(&rport->lock); + schedule_work(&rport->ls_work); return ret; } tls_req->status = 0; - tls_req->tport = rport->targetport->private; ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req, lsreq->rqstaddr, lsreq->rqstlen); @@ -337,18 +351,28 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, } static int -fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, +fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, struct nvmefc_tgt_ls_req *tgt_lsreq) { struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq); struct nvmefc_ls_req *lsreq = tls_req->lsreq; + struct fcloop_tport *tport = targetport->private; + struct nvme_fc_remote_port *remoteport = tport->remoteport; + struct fcloop_rport *rport; memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf, ((lsreq->rsplen < tgt_lsreq->rsplen) ? lsreq->rsplen : tgt_lsreq->rsplen)); + tgt_lsreq->done(tgt_lsreq); - schedule_work(&tls_req->work); + if (remoteport) { + rport = remoteport->private; + spin_lock(&rport->lock); + list_add_tail(&rport->ls_list, &tls_req->ls_list); + spin_unlock(&rport->lock); + schedule_work(&rport->ls_work); + } return 0; } @@ -834,6 +858,7 @@ fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport) { struct fcloop_rport *rport = remoteport->private; + flush_work(&rport->ls_work); fcloop_nport_put(rport->nport); } @@ -1136,6 +1161,9 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr, rport->nport = nport; rport->lport = nport->lport; nport->rport = rport; + spin_lock_init(&rport->lock); + INIT_WORK(&rport->ls_work, fcloop_rport_lsrqst_work); + INIT_LIST_HEAD(&rport->ls_list); return count; } From a62315b83664dc9a7a6c6170ba2d174bb9fbed3c Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 31 Mar 2020 15:46:33 +0300 Subject: [PATCH 092/280] nvme-rdma: Replace comma with a semicolon Use a semicolon at the end of an assignment expression. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 86603d9b0cef..f704e7227d05 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1350,7 +1350,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, int ret; sge->addr = qe->dma; - sge->length = sizeof(struct nvme_command), + sge->length = sizeof(struct nvme_command); sge->lkey = queue->device->pd->local_dma_lkey; wr.next = NULL; From d038dd815fc56cd77ae8a51bb6d1d11e3aab9609 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 18 Mar 2020 14:40:43 -0700 Subject: [PATCH 093/280] nvmet-fc: fix typo in comment Fix typo in comment: about should be abort Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Chiatanya Kulkarni Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index a0db6371b43e..a8ceb7721640 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -684,7 +684,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) disconnect = atomic_xchg(&queue->connected, 0); spin_lock_irqsave(&queue->qlock, flags); - /* about outstanding io's */ + /* abort outstanding io's */ for (i = 0; i < queue->sqsize; fod++, i++) { if (fod->active) { spin_lock(&fod->flock); From 0558955373023b08f638c9ede36741b0e4200f58 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Tue, 31 Mar 2020 14:05:18 +0800 Subject: [PATCH 094/280] io_uring: refactor file register/unregister/update handling While diving into io_uring fileset register/unregister/update codes, we found one bug in the fileset update handling. io_uring fileset update use a percpu_ref variable to check whether we can put the previously registered file, only when the refcnt of the perfcpu_ref variable reaches zero, can we safely put these files. But this doesn't work so well. If applications always issue requests continually, this perfcpu_ref will never have an chance to reach zero, and it'll always be in atomic mode, also will defeat the gains introduced by fileset register/unresiger/update feature, which are used to reduce the atomic operation overhead of fput/fget. To fix this issue, while applications do IORING_REGISTER_FILES or IORING_REGISTER_FILES_UPDATE operations, we allocate a new percpu_ref and kill the old percpu_ref, new requests will use the new percpu_ref. Once all previous old requests complete, old percpu_refs will be dropped and registered files will be put safely. Link: https://lore.kernel.org/io-uring/5a8dac33-4ca2-4847-b091-f7dcd3ad0ff3@linux.alibaba.com/T/#t Signed-off-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- fs/io_uring.c | 206 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 125 insertions(+), 81 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 358f97be9c7b..7b5087904640 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -186,14 +186,23 @@ struct fixed_file_table { struct file **files; }; +struct fixed_file_ref_node { + struct percpu_ref refs; + struct list_head node; + struct list_head file_list; + struct fixed_file_data *file_data; + struct work_struct work; +}; + struct fixed_file_data { struct fixed_file_table *table; struct io_ring_ctx *ctx; + struct percpu_ref *cur_refs; struct percpu_ref refs; - struct llist_head put_llist; - struct work_struct ref_work; struct completion done; + struct list_head ref_list; + spinlock_t lock; }; struct io_buffer { @@ -618,6 +627,8 @@ struct io_kiocb { struct list_head inflight_entry; + struct percpu_ref *fixed_file_refs; + union { /* * Only commands that never go async can use the below fields, @@ -848,7 +859,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_files_update *ip, unsigned nr_args); static int io_grab_files(struct io_kiocb *req); -static void io_ring_file_ref_flush(struct fixed_file_data *data); static void io_cleanup_req(struct io_kiocb *req); static int io_file_get(struct io_submit_state *state, struct io_kiocb *req, int fd, struct file **out_file, bool fixed); @@ -1341,7 +1351,7 @@ static inline void io_put_file(struct io_kiocb *req, struct file *file, bool fixed) { if (fixed) - percpu_ref_put(&req->ctx->file_data->refs); + percpu_ref_put(req->fixed_file_refs); else fput(file); } @@ -1393,21 +1403,18 @@ struct req_batch { static void io_free_req_many(struct io_ring_ctx *ctx, struct req_batch *rb) { - int fixed_refs = rb->to_free; - if (!rb->to_free) return; if (rb->need_iter) { int i, inflight = 0; unsigned long flags; - fixed_refs = 0; for (i = 0; i < rb->to_free; i++) { struct io_kiocb *req = rb->reqs[i]; if (req->flags & REQ_F_FIXED_FILE) { req->file = NULL; - fixed_refs++; + percpu_ref_put(req->fixed_file_refs); } if (req->flags & REQ_F_INFLIGHT) inflight++; @@ -1433,8 +1440,6 @@ static void io_free_req_many(struct io_ring_ctx *ctx, struct req_batch *rb) } do_free: kmem_cache_free_bulk(req_cachep, rb->to_free, rb->reqs); - if (fixed_refs) - percpu_ref_put_many(&ctx->file_data->refs, fixed_refs); percpu_ref_put_many(&ctx->refs, rb->to_free); rb->to_free = rb->need_iter = 0; } @@ -5331,7 +5336,8 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req, file = io_file_from_index(ctx, fd); if (!file) return -EBADF; - percpu_ref_get(&ctx->file_data->refs); + req->fixed_file_refs = ctx->file_data->cur_refs; + percpu_ref_get(req->fixed_file_refs); } else { trace_io_uring_file_get(ctx, fd); file = __io_file_get(state, fd); @@ -6124,43 +6130,36 @@ static void io_file_ref_kill(struct percpu_ref *ref) complete(&data->done); } -static void io_file_ref_exit_and_free(struct work_struct *work) -{ - struct fixed_file_data *data; - - data = container_of(work, struct fixed_file_data, ref_work); - - /* - * Ensure any percpu-ref atomic switch callback has run, it could have - * been in progress when the files were being unregistered. Once - * that's done, we can safely exit and free the ref and containing - * data structure. - */ - rcu_barrier(); - percpu_ref_exit(&data->refs); - kfree(data); -} - static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { struct fixed_file_data *data = ctx->file_data; + struct fixed_file_ref_node *ref_node = NULL; unsigned nr_tables, i; + unsigned long flags; if (!data) return -ENXIO; - percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill); - flush_work(&data->ref_work); + spin_lock_irqsave(&data->lock, flags); + if (!list_empty(&data->ref_list)) + ref_node = list_first_entry(&data->ref_list, + struct fixed_file_ref_node, node); + spin_unlock_irqrestore(&data->lock, flags); + if (ref_node) + percpu_ref_kill(&ref_node->refs); + + percpu_ref_kill(&data->refs); + + /* wait for all refs nodes to complete */ wait_for_completion(&data->done); - io_ring_file_ref_flush(data); __io_sqe_files_unregister(ctx); nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE); for (i = 0; i < nr_tables; i++) kfree(data->table[i].files); kfree(data->table); - INIT_WORK(&data->ref_work, io_file_ref_exit_and_free); - queue_work(system_wq, &data->ref_work); + percpu_ref_exit(&data->refs); + kfree(data); ctx->file_data = NULL; ctx->nr_user_files = 0; return 0; @@ -6385,46 +6384,72 @@ static void io_ring_file_put(struct io_ring_ctx *ctx, struct file *file) } struct io_file_put { - struct llist_node llist; + struct list_head list; struct file *file; }; -static void io_ring_file_ref_flush(struct fixed_file_data *data) +static void io_file_put_work(struct work_struct *work) { + struct fixed_file_ref_node *ref_node; + struct fixed_file_data *file_data; + struct io_ring_ctx *ctx; struct io_file_put *pfile, *tmp; - struct llist_node *node; + unsigned long flags; - while ((node = llist_del_all(&data->put_llist)) != NULL) { - llist_for_each_entry_safe(pfile, tmp, node, llist) { - io_ring_file_put(data->ctx, pfile->file); - kfree(pfile); - } + ref_node = container_of(work, struct fixed_file_ref_node, work); + file_data = ref_node->file_data; + ctx = file_data->ctx; + + list_for_each_entry_safe(pfile, tmp, &ref_node->file_list, list) { + list_del_init(&pfile->list); + io_ring_file_put(ctx, pfile->file); + kfree(pfile); } -} -static void io_ring_file_ref_switch(struct work_struct *work) -{ - struct fixed_file_data *data; + spin_lock_irqsave(&file_data->lock, flags); + list_del_init(&ref_node->node); + spin_unlock_irqrestore(&file_data->lock, flags); - data = container_of(work, struct fixed_file_data, ref_work); - io_ring_file_ref_flush(data); - percpu_ref_switch_to_percpu(&data->refs); + percpu_ref_exit(&ref_node->refs); + kfree(ref_node); + percpu_ref_put(&file_data->refs); } static void io_file_data_ref_zero(struct percpu_ref *ref) { - struct fixed_file_data *data; + struct fixed_file_ref_node *ref_node; - data = container_of(ref, struct fixed_file_data, refs); + ref_node = container_of(ref, struct fixed_file_ref_node, refs); - /* - * We can't safely switch from inside this context, punt to wq. If - * the table ref is going away, the table is being unregistered. - * Don't queue up the async work for that case, the caller will - * handle it. - */ - if (!percpu_ref_is_dying(&data->refs)) - queue_work(system_wq, &data->ref_work); + queue_work(system_wq, &ref_node->work); +} + +static struct fixed_file_ref_node *alloc_fixed_file_ref_node( + struct io_ring_ctx *ctx) +{ + struct fixed_file_ref_node *ref_node; + + ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); + if (!ref_node) + return ERR_PTR(-ENOMEM); + + if (percpu_ref_init(&ref_node->refs, io_file_data_ref_zero, + 0, GFP_KERNEL)) { + kfree(ref_node); + return ERR_PTR(-ENOMEM); + } + INIT_LIST_HEAD(&ref_node->node); + INIT_LIST_HEAD(&ref_node->file_list); + INIT_WORK(&ref_node->work, io_file_put_work); + ref_node->file_data = ctx->file_data; + return ref_node; + +} + +static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node) +{ + percpu_ref_exit(&ref_node->refs); + kfree(ref_node); } static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, @@ -6435,6 +6460,8 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, struct file *file; int fd, ret = 0; unsigned i; + struct fixed_file_ref_node *ref_node; + unsigned long flags; if (ctx->file_data) return -EBUSY; @@ -6448,6 +6475,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, return -ENOMEM; ctx->file_data->ctx = ctx; init_completion(&ctx->file_data->done); + INIT_LIST_HEAD(&ctx->file_data->ref_list); nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE); ctx->file_data->table = kcalloc(nr_tables, @@ -6459,15 +6487,13 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, return -ENOMEM; } - if (percpu_ref_init(&ctx->file_data->refs, io_file_data_ref_zero, + if (percpu_ref_init(&ctx->file_data->refs, io_file_ref_kill, PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { kfree(ctx->file_data->table); kfree(ctx->file_data); ctx->file_data = NULL; return -ENOMEM; } - ctx->file_data->put_llist.first = NULL; - INIT_WORK(&ctx->file_data->ref_work, io_ring_file_ref_switch); if (io_sqe_alloc_file_tables(ctx, nr_tables, nr_args)) { percpu_ref_exit(&ctx->file_data->refs); @@ -6530,9 +6556,22 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, } ret = io_sqe_files_scm(ctx); - if (ret) + if (ret) { io_sqe_files_unregister(ctx); + return ret; + } + ref_node = alloc_fixed_file_ref_node(ctx); + if (IS_ERR(ref_node)) { + io_sqe_files_unregister(ctx); + return PTR_ERR(ref_node); + } + + ctx->file_data->cur_refs = &ref_node->refs; + spin_lock_irqsave(&ctx->file_data->lock, flags); + list_add(&ref_node->node, &ctx->file_data->ref_list); + spin_unlock_irqrestore(&ctx->file_data->lock, flags); + percpu_ref_get(&ctx->file_data->refs); return ret; } @@ -6579,30 +6618,21 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file, #endif } -static void io_atomic_switch(struct percpu_ref *ref) -{ - struct fixed_file_data *data; - - /* - * Juggle reference to ensure we hit zero, if needed, so we can - * switch back to percpu mode - */ - data = container_of(ref, struct fixed_file_data, refs); - percpu_ref_put(&data->refs); - percpu_ref_get(&data->refs); -} - static int io_queue_file_removal(struct fixed_file_data *data, - struct file *file) + struct file *file) { struct io_file_put *pfile; + struct percpu_ref *refs = data->cur_refs; + struct fixed_file_ref_node *ref_node; pfile = kzalloc(sizeof(*pfile), GFP_KERNEL); if (!pfile) return -ENOMEM; + ref_node = container_of(refs, struct fixed_file_ref_node, refs); pfile->file = file; - llist_add(&pfile->llist, &data->put_llist); + list_add(&pfile->list, &ref_node->file_list); + return 0; } @@ -6611,17 +6641,23 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, unsigned nr_args) { struct fixed_file_data *data = ctx->file_data; - bool ref_switch = false; + struct fixed_file_ref_node *ref_node; struct file *file; __s32 __user *fds; int fd, i, err; __u32 done; + unsigned long flags; + bool needs_switch = false; if (check_add_overflow(up->offset, nr_args, &done)) return -EOVERFLOW; if (done > ctx->nr_user_files) return -EINVAL; + ref_node = alloc_fixed_file_ref_node(ctx); + if (IS_ERR(ref_node)) + return PTR_ERR(ref_node); + done = 0; fds = u64_to_user_ptr(up->fds); while (nr_args) { @@ -6642,7 +6678,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (err) break; table->files[index] = NULL; - ref_switch = true; + needs_switch = true; } if (fd != -1) { file = fget(fd); @@ -6673,11 +6709,19 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, up->offset++; } - if (ref_switch) - percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch); + if (needs_switch) { + percpu_ref_kill(data->cur_refs); + spin_lock_irqsave(&data->lock, flags); + list_add(&ref_node->node, &data->ref_list); + data->cur_refs = &ref_node->refs; + spin_unlock_irqrestore(&data->lock, flags); + percpu_ref_get(&ctx->file_data->refs); + } else + destroy_fixed_file_ref_node(ref_node); return done ? done : err; } + static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) { From c6425702f21e68d7c8c293b6bfaa5a389076efe5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 27 Mar 2020 08:49:44 -0700 Subject: [PATCH 095/280] xfs: ratelimit inode flush on buffered write ENOSPC A customer reported rcu stalls and softlockup warnings on a computer with many CPU cores and many many more IO threads trying to write to a filesystem that is totally out of space. Subsequent analysis pointed to the many many IO threads calling xfs_flush_inodes -> sync_inodes_sb, which causes a lot of wb_writeback_work to be queued. The writeback worker spends so much time trying to wake the many many threads waiting for writeback completion that it trips the softlockup detector, and (in this case) the system automatically reboots. In addition, they complain that the lengthy xfs_flush_inodes scan traps all of those threads in uninterruptible sleep, which hampers their ability to kill the program or do anything else to escape the situation. If there's thousands of threads trying to write to files on a full filesystem, each of those threads will start separate copies of the inode flush scan. This is kind of pointless since we only need one scan, so rate limit the inode flush. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_mount.h | 1 + fs/xfs/xfs_super.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 88ab09ed29e7..50c43422fa17 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -167,6 +167,7 @@ typedef struct xfs_mount { struct xfs_kobj m_error_meta_kobj; struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; struct xstats m_stats; /* per-fs stats */ + struct ratelimit_state m_flush_inodes_ratelimit; struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_unwritten_workqueue; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 68fea439d974..abf06bf9c3f3 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -528,6 +528,9 @@ xfs_flush_inodes( { struct super_block *sb = mp->m_super; + if (!__ratelimit(&mp->m_flush_inodes_ratelimit)) + return; + if (down_read_trylock(&sb->s_umount)) { sync_inodes_sb(sb); up_read(&sb->s_umount); @@ -1366,6 +1369,17 @@ xfs_fc_fill_super( if (error) goto out_free_names; + /* + * Cap the number of invocations of xfs_flush_inodes to 16 for every + * quarter of a second. The magic numbers here were determined by + * observation neither to cause stalls in writeback when there are a + * lot of IO threads and the fs is near ENOSPC, nor cause any fstest + * regressions. YMMV. + */ + ratelimit_state_init(&mp->m_flush_inodes_ratelimit, HZ / 4, 16); + ratelimit_set_flags(&mp->m_flush_inodes_ratelimit, + RATELIMIT_MSG_ON_RELEASE); + error = xfs_init_mount_workqueues(mp); if (error) goto out_close_devices; From d8fcb6f1346c36316ccb20f887081299a61bbcc8 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Sun, 29 Mar 2020 09:45:19 -0700 Subject: [PATCH 096/280] xfs: remove redundant variable assignment in xfs_symlink() The variables 'udqp' and 'gdqp' have been initialized, so remove redundant variable assignment in xfs_symlink(). Signed-off-by: Kaixu Xia Reviewed-by: Chaitanya Kulkarni Reviewed-by: Dave Chinner Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_symlink.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index fa0fa3c70f1a..13fb4b919648 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -176,7 +176,6 @@ xfs_symlink( return -ENAMETOOLONG; ASSERT(pathlen > 0); - udqp = gdqp = NULL; prid = xfs_get_initial_prid(dp); /* From 25e5cb780e62bde432b401f312bb847edc78b432 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 23 Mar 2020 15:06:30 -0700 Subject: [PATCH 097/280] nvme-tcp: fix possible crash in write_zeroes processing We cannot look at blk_rq_payload_bytes without first checking that the request has a mappable physical segments first (e.g. blk_rq_nr_phys_segments(rq) != 0) and only then to take the request payload bytes. This caused us to send a wrong sgl to the target or even dereference a non-existing buffer in case we actually got to the data send sequence (if it was in-capsule). Reported-by: Tony Asleson Suggested-by: Chaitanya Kulkarni Signed-off-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 0ef14f0fad86..aa754ae3ca08 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -174,16 +174,14 @@ static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req) static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req) { struct request *rq; - unsigned int bytes; if (unlikely(nvme_tcp_async_req(req))) return false; /* async events don't have a request */ rq = blk_mq_rq_from_pdu(req); - bytes = blk_rq_payload_bytes(rq); - return rq_data_dir(rq) == WRITE && bytes && - bytes <= nvme_tcp_inline_data_size(req->queue); + return rq_data_dir(rq) == WRITE && req->data_len && + req->data_len <= nvme_tcp_inline_data_size(req->queue); } static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req) @@ -2164,7 +2162,9 @@ static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue, c->common.flags |= NVME_CMD_SGL_METABUF; - if (rq_data_dir(rq) == WRITE && req->data_len && + if (!blk_rq_nr_phys_segments(rq)) + nvme_tcp_set_sg_null(c); + else if (rq_data_dir(rq) == WRITE && req->data_len <= nvme_tcp_inline_data_size(queue)) nvme_tcp_set_sg_inline(queue, c, req->data_len); else @@ -2191,7 +2191,8 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, req->data_sent = 0; req->pdu_len = 0; req->pdu_sent = 0; - req->data_len = blk_rq_payload_bytes(rq); + req->data_len = blk_rq_nr_phys_segments(rq) ? + blk_rq_payload_bytes(rq) : 0; req->curr_bio = rq->bio; if (rq_data_dir(rq) == WRITE && From f86e5bf817a57c7e6538dafee2fc65a525bb9935 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 23 Mar 2020 16:43:52 -0700 Subject: [PATCH 098/280] nvme-tcp: don't poll a non-live queue In error recovery we might be removing the queue so check we can actually poll before we do. Reported-by: Mark Wunderlich Tested-by: Mark Wunderlich Signed-off-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index aa754ae3ca08..eb31c689d2cf 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2299,6 +2299,9 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) struct nvme_tcp_queue *queue = hctx->driver_data; struct sock *sk = queue->sock->sk; + if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) + return 0; + if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, true); nvme_tcp_try_recv(queue); From 7f2430cda819a9ecb1df5a0f3ef4f1c20db3f811 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 30 Mar 2020 19:52:10 +0200 Subject: [PATCH 099/280] ASoC: qcom: q6asm-dai: Add SNDRV_PCM_INFO_BATCH flag At the moment, playing audio with PulseAudio with the qdsp6 driver results in distorted sound. It seems like its timer-based scheduling does not work properly with qdsp6 since setting tsched=0 in the PulseAudio configuration avoids the issue. Apparently this happens when the pointer() callback is not accurate enough. There is a SNDRV_PCM_INFO_BATCH flag that can be used to stop PulseAudio from using timer-based scheduling by default. According to https://www.alsa-project.org/pipermail/alsa-devel/2014-March/073816.html: The flag is being used in the sense explained in the previous audio meeting -- the data transfer granularity isn't fine enough but aligned to the period size (or less). q6asm-dai reports the position as multiple of prtd->pcm_count = snd_pcm_lib_period_bytes(substream) so it indeed just a multiple of the period size. Therefore adding the flag here seems appropriate and makes audio work out of the box. Fixes: 2a9e92d371db ("ASoC: qdsp6: q6asm: Add q6asm dai driver") Signed-off-by: Stephan Gerhold Reviewed-by: Srinivas Kandagatla Cc: Srinivas Kandagatla Link: https://lore.kernel.org/r/20200330175210.47518-1-stephan@gerhold.net Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6asm-dai.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c index f6c7cddf08e8..125af00bba53 100644 --- a/sound/soc/qcom/qdsp6/q6asm-dai.c +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c @@ -78,7 +78,7 @@ struct q6asm_dai_data { }; static const struct snd_pcm_hardware q6asm_dai_hardware_capture = { - .info = (SNDRV_PCM_INFO_MMAP | + .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_BATCH | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_INTERLEAVED | @@ -100,7 +100,7 @@ static const struct snd_pcm_hardware q6asm_dai_hardware_capture = { }; static struct snd_pcm_hardware q6asm_dai_hardware_playback = { - .info = (SNDRV_PCM_INFO_MMAP | + .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_BATCH | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_INTERLEAVED | From 3bbbb7728fc853d71dbce4073fef9f281fbfb4dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EA=B2=BD=ED=83=9D?= Date: Tue, 31 Mar 2020 16:55:16 +0900 Subject: [PATCH 100/280] ASoC: dapm: connect virtual mux with default value Since a virtual mixer has no backing registers to decide which path to connect, it will try to match with initial state. This is to ensure that the default mixer choice will be correctly powered up during initialization. Invert flag is used to select initial state of the virtual switch. Since actual hardware can't be disconnected by virtual switch, connected is better choice as initial state in many cases. Signed-off-by: Gyeongtaek Lee Link: https://lore.kernel.org/r/01a301d60731$b724ea10$256ebe30$@samsung.com Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 04da7928c873..679ed60d850e 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -802,7 +802,13 @@ static void dapm_set_mixer_path_status(struct snd_soc_dapm_path *p, int i, val = max - val; p->connect = !!val; } else { - p->connect = 0; + /* since a virtual mixer has no backing registers to + * decide which path to connect, it will try to match + * with initial state. This is to ensure + * that the default mixer choice will be + * correctly powered up during initialization. + */ + p->connect = invert; } } From 13e60d3ba287d96eeaf1deaadba51f71578119a3 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Tue, 24 Mar 2020 15:58:50 +0800 Subject: [PATCH 101/280] scsi: iscsi: Report unbind session event when the target has been removed If the daemon is restarted or crashes while logging out of a session, the unbind session event sent by the kernel is not processed and is lost. When the daemon starts again, the session can't be unbound because the daemon is waiting for the event message. However, the kernel has already logged out and the event will not be resent. When iscsid restart is complete, logout session reports error: Logging out of session [sid: 6, target: iqn.xxxxx, portal: xx.xx.xx.xx,3260] iscsiadm: Could not logout of [sid: 6, target: iscsiadm -m node iqn.xxxxx, portal: xx.xx.xx.xx,3260]. iscsiadm: initiator reported error (9 - internal error) iscsiadm: Could not logout of all requested sessions Make sure the unbind event is emitted. [mkp: commit desc and applied by hand since patch was mangled] Link: https://lore.kernel.org/r/4eab1771-2cb3-8e79-b31c-923652340e99@huawei.com Reviewed-by: Lee Duncan Signed-off-by: Wu Bo Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0ec1b31c75a9..b2a803c51288 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2022,7 +2022,7 @@ static void __iscsi_unbind_session(struct work_struct *work) if (session->target_id == ISCSI_MAX_TARGET) { spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); - return; + goto unbind_session_exit; } target_id = session->target_id; @@ -2034,6 +2034,8 @@ static void __iscsi_unbind_session(struct work_struct *work) ida_simple_remove(&iscsi_sess_ida, target_id); scsi_remove_target(&session->dev); + +unbind_session_exit: iscsi_session_event(session, ISCSI_KEVENT_UNBIND_SESSION); ISCSI_DBG_TRANS_SESSION(session, "Completed target removal\n"); } From 1764fa2ab97ade8de7269eb34f2740c54e38fc4a Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 27 Mar 2020 17:58:35 +0800 Subject: [PATCH 102/280] scsi: ufs: set device as active power mode after resetting device Currently ufshcd driver assumes that bInitPowerMode parameter is not changed by any vendors thus device power mode can be set as "Active" during initialization. According to UFS JEDEC specification, device power mode shall be "Active" after HW Reset is triggered if the bInitPowerMode parameter in Device Descriptor is default value. By above description, we can set device power mode as "Active" after device reset is triggered by vendor's callback. With this change, the link startup performance can be improved in some cases by not setting link_startup_again as true in ufshcd_link_startup(). Link: https://lore.kernel.org/r/20200327095835.10293-1-stanley.chu@mediatek.com Reviewed-by: Can Guo Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 13 ------------- drivers/scsi/ufs/ufshcd.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 8d1c70ac44b8..ad61516fc64f 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -172,19 +172,6 @@ enum { #define ufshcd_clear_eh_in_progress(h) \ ((h)->eh_flags &= ~UFSHCD_EH_IN_PROGRESS) -#define ufshcd_set_ufs_dev_active(h) \ - ((h)->curr_dev_pwr_mode = UFS_ACTIVE_PWR_MODE) -#define ufshcd_set_ufs_dev_sleep(h) \ - ((h)->curr_dev_pwr_mode = UFS_SLEEP_PWR_MODE) -#define ufshcd_set_ufs_dev_poweroff(h) \ - ((h)->curr_dev_pwr_mode = UFS_POWERDOWN_PWR_MODE) -#define ufshcd_is_ufs_dev_active(h) \ - ((h)->curr_dev_pwr_mode == UFS_ACTIVE_PWR_MODE) -#define ufshcd_is_ufs_dev_sleep(h) \ - ((h)->curr_dev_pwr_mode == UFS_SLEEP_PWR_MODE) -#define ufshcd_is_ufs_dev_poweroff(h) \ - ((h)->curr_dev_pwr_mode == UFS_POWERDOWN_PWR_MODE) - struct ufs_pm_lvl_states ufs_pm_lvl_states[] = { {UFS_ACTIVE_PWR_MODE, UIC_LINK_ACTIVE_STATE}, {UFS_ACTIVE_PWR_MODE, UIC_LINK_HIBERN8_STATE}, diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index fafc781fead9..6ffc08ad85f6 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -130,6 +130,19 @@ enum uic_link_state { #define ufshcd_set_link_hibern8(hba) ((hba)->uic_link_state = \ UIC_LINK_HIBERN8_STATE) +#define ufshcd_set_ufs_dev_active(h) \ + ((h)->curr_dev_pwr_mode = UFS_ACTIVE_PWR_MODE) +#define ufshcd_set_ufs_dev_sleep(h) \ + ((h)->curr_dev_pwr_mode = UFS_SLEEP_PWR_MODE) +#define ufshcd_set_ufs_dev_poweroff(h) \ + ((h)->curr_dev_pwr_mode = UFS_POWERDOWN_PWR_MODE) +#define ufshcd_is_ufs_dev_active(h) \ + ((h)->curr_dev_pwr_mode == UFS_ACTIVE_PWR_MODE) +#define ufshcd_is_ufs_dev_sleep(h) \ + ((h)->curr_dev_pwr_mode == UFS_SLEEP_PWR_MODE) +#define ufshcd_is_ufs_dev_poweroff(h) \ + ((h)->curr_dev_pwr_mode == UFS_POWERDOWN_PWR_MODE) + /* * UFS Power management levels. * Each level is in increasing order of power savings. @@ -1084,6 +1097,7 @@ static inline void ufshcd_vops_device_reset(struct ufs_hba *hba) { if (hba->vops && hba->vops->device_reset) { hba->vops->device_reset(hba); + ufshcd_set_ufs_dev_active(hba); ufshcd_update_reg_hist(&hba->ufs_stats.dev_reset, 0); } } From cc41f11a21a51d6869d71e525a7264c748d7c0d7 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Fri, 27 Mar 2020 05:52:43 -0400 Subject: [PATCH 103/280] scsi: mpt3sas: Fix kernel panic observed on soft HBA unplug Generic protection fault type kernel panic is observed when user performs soft (ordered) HBA unplug operation while IOs are running on drives connected to HBA. When user performs ordered HBA removal operation, the kernel calls PCI device's .remove() call back function where driver is flushing out all the outstanding SCSI IO commands with DID_NO_CONNECT host byte and also unmaps sg buffers allocated for these IO commands. However, in the ordered HBA removal case (unlike of real HBA hot removal), HBA device is still alive and hence HBA hardware is performing the DMA operations to those buffers on the system memory which are already unmapped while flushing out the outstanding SCSI IO commands and this leads to kernel panic. Don't flush out the outstanding IOs from .remove() path in case of ordered removal since HBA will be still alive in this case and it can complete the outstanding IOs. Flush out the outstanding IOs only in case of 'physical HBA hot unplug' where there won't be any communication with the HBA. During shutdown also it is possible that HBA hardware can perform DMA operations on those outstanding IO buffers which are completed with DID_NO_CONNECT by the driver from .shutdown(). So same above fix is applied in shutdown path as well. It is safe to drop the outstanding commands when HBA is inaccessible such as when permanent PCI failure happens, when HBA is in non-operational state, or when someone does a real HBA hot unplug operation. Since driver knows that HBA is inaccessible during these cases, it is safe to drop the outstanding commands instead of waiting for SCSI error recovery to kick in and clear these outstanding commands. Link: https://lore.kernel.org/r/1585302763-23007-1-git-send-email-sreekanth.reddy@broadcom.com Fixes: c666d3be99c0 ("scsi: mpt3sas: wait for and flush running commands on shutdown/unload") Cc: stable@vger.kernel.org #v4.14.174+ Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 778d5e6ce385..04a40afe60e3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -9908,8 +9908,8 @@ static void scsih_remove(struct pci_dev *pdev) ioc->remove_host = 1; - mpt3sas_wait_for_commands_to_complete(ioc); - _scsih_flush_running_cmds(ioc); + if (!pci_device_is_present(pdev)) + _scsih_flush_running_cmds(ioc); _scsih_fw_event_cleanup_queue(ioc); @@ -9992,8 +9992,8 @@ scsih_shutdown(struct pci_dev *pdev) ioc->remove_host = 1; - mpt3sas_wait_for_commands_to_complete(ioc); - _scsih_flush_running_cmds(ioc); + if (!pci_device_is_present(pdev)) + _scsih_flush_running_cmds(ioc); _scsih_fw_event_cleanup_queue(ioc); From 0d2b59515245b5a231f97bce66e601b110baa52a Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 27 Mar 2020 19:11:02 +0000 Subject: [PATCH 104/280] scsi: aic7xxx: Remove more FreeBSD-specific code Remove additional code for FreeBSD in aic7xxx_core.c, which is unneeded since commit cca6cb8ad7a8 ("scsi: aic7xxx: Fix build using bare-metal toolchain"). Link: https://lore.kernel.org/r/20200327191102.78554-1-alex.dewar@gmx.co.uk Suggested-by: Martin Petersen Signed-off-by: Alex Dewar Signed-off-by: Martin K. Petersen --- drivers/scsi/aic7xxx/aic7xxx_core.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c index 4190a025381a..84fc499cb1e6 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_core.c +++ b/drivers/scsi/aic7xxx/aic7xxx_core.c @@ -1834,21 +1834,6 @@ ahc_handle_scsiint(struct ahc_softc *ahc, u_int intstat) printerror = 0; } else if (ahc_sent_msg(ahc, AHCMSG_1B, MSG_BUS_DEV_RESET, TRUE)) { -#ifdef __FreeBSD__ - /* - * Don't mark the user's request for this BDR - * as completing with CAM_BDR_SENT. CAM3 - * specifies CAM_REQ_CMP. - */ - if (scb != NULL - && scb->io_ctx->ccb_h.func_code== XPT_RESET_DEV - && ahc_match_scb(ahc, scb, target, channel, - CAM_LUN_WILDCARD, - SCB_LIST_NULL, - ROLE_INITIATOR)) { - ahc_set_transaction_status(scb, CAM_REQ_CMP); - } -#endif ahc_compile_devinfo(&devinfo, initiator_role_id, target, @@ -4399,22 +4384,16 @@ ahc_alloc(void *platform_arg, char *name) struct ahc_softc *ahc; int i; -#ifndef __FreeBSD__ ahc = kmalloc(sizeof(*ahc), GFP_ATOMIC); if (!ahc) { printk("aic7xxx: cannot malloc softc!\n"); kfree(name); return NULL; } -#else - ahc = device_get_softc((device_t)platform_arg); -#endif memset(ahc, 0, sizeof(*ahc)); ahc->seep_config = kmalloc(sizeof(*ahc->seep_config), GFP_ATOMIC); if (ahc->seep_config == NULL) { -#ifndef __FreeBSD__ kfree(ahc); -#endif kfree(name); return (NULL); } @@ -4540,9 +4519,7 @@ ahc_free(struct ahc_softc *ahc) kfree(ahc->name); if (ahc->seep_config != NULL) kfree(ahc->seep_config); -#ifndef __FreeBSD__ kfree(ahc); -#endif return; } From 72655c0ebd1d941d80f47bf614b02d563a1e61ae Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 29 Mar 2020 19:51:51 -0700 Subject: [PATCH 105/280] scsi: sr: Fix sr_block_release() This patch fixes the following two complaints: WARNING: CPU: 3 PID: 1326 at kernel/locking/mutex-debug.c:103 mutex_destroy+0x74/0x80 Modules linked in: scsi_debug sd_mod t10_pi brd scsi_transport_iscsi af_packet crct10dif_pclmul sg aesni_intel glue_helper virtio_balloon button crypto_simd cryptd intel_agp intel_gtt agpgart ip_tables x_tables ipv6 nf_defrag_ipv6 autofs4 ext4 crc16 mbcache jbd2 hid_generic usbhid hid sr_mod cdrom ata_generic pata_acpi virtio_blk virtio_net net_failover failover ata_piix xhci_pci ahci libahci xhci_hcd i2c_piix4 libata virtio_pci usbcore i2c_core virtio_ring scsi_mod usb_common virtio [last unloaded: scsi_debug] CPU: 3 PID: 1326 Comm: systemd-udevd Not tainted 5.6.0-rc1-dbg+ #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 RIP: 0010:mutex_destroy+0x74/0x80 Call Trace: sr_kref_release+0xb9/0xd0 [sr_mod] scsi_cd_put+0x79/0x90 [sr_mod] sr_block_release+0x54/0x70 [sr_mod] __blkdev_put+0x2ce/0x3c0 blkdev_put+0x68/0x220 blkdev_close+0x4d/0x60 __fput+0x170/0x3b0 ____fput+0x12/0x20 task_work_run+0xa2/0xf0 exit_to_usermode_loop+0xeb/0xf0 do_syscall_64+0x2be/0x300 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fa16d40aab7 BUG: KASAN: use-after-free in __mutex_unlock_slowpath+0x98/0x420 Read of size 8 at addr ffff8881c6e4f4b0 by task systemd-udevd/1326 CPU: 3 PID: 1326 Comm: systemd-udevd Tainted: G W 5.6.0-rc1-dbg+ #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Call Trace: dump_stack+0xa5/0xe6 print_address_description.constprop.0+0x46/0x60 __kasan_report.cold+0x7b/0x94 kasan_report+0x16/0x20 check_memory_region+0x140/0x1b0 __kasan_check_read+0x15/0x20 __mutex_unlock_slowpath+0x98/0x420 mutex_unlock+0x16/0x20 sr_block_release+0x5c/0x70 [sr_mod] __blkdev_put+0x2ce/0x3c0 hardirqs last enabled at (1875522): [] _raw_spin_unlock_irqrestore+0x56/0x70 blkdev_put+0x68/0x220 blkdev_close+0x4d/0x60 __fput+0x170/0x3b0 ____fput+0x12/0x20 task_work_run+0xa2/0xf0 exit_to_usermode_loop+0xeb/0xf0 do_syscall_64+0x2be/0x300 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fa16d40aab7 Allocated by task 3201: save_stack+0x23/0x90 __kasan_kmalloc.constprop.0+0xcf/0xe0 kasan_kmalloc+0xd/0x10 kmem_cache_alloc_trace+0x161/0x3c0 sr_probe+0x12f/0xb60 [sr_mod] really_probe+0x183/0x5d0 driver_probe_device+0x13f/0x1a0 __device_attach_driver+0xe6/0x150 bus_for_each_drv+0x101/0x160 __device_attach+0x183/0x230 device_initial_probe+0x17/0x20 bus_probe_device+0x110/0x130 device_add+0xb7b/0xd40 scsi_sysfs_add_sdev+0xe8/0x360 [scsi_mod] scsi_probe_and_add_lun+0xdc4/0x14c0 [scsi_mod] __scsi_scan_target+0x12d/0x850 [scsi_mod] scsi_scan_channel+0xcd/0xe0 [scsi_mod] scsi_scan_host_selected+0x182/0x190 [scsi_mod] store_scan+0x1e9/0x200 [scsi_mod] dev_attr_store+0x42/0x60 sysfs_kf_write+0x8b/0xb0 kernfs_fop_write+0x158/0x250 __vfs_write+0x4c/0x90 vfs_write+0x145/0x2c0 ksys_write+0xd7/0x180 __x64_sys_write+0x47/0x50 do_syscall_64+0x6f/0x300 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 1326: save_stack+0x23/0x90 __kasan_slab_free+0x13a/0x190 kasan_slab_free+0x12/0x20 kfree+0x109/0x410 sr_kref_release+0xc1/0xd0 [sr_mod] scsi_cd_put+0x79/0x90 [sr_mod] sr_block_release+0x54/0x70 [sr_mod] __blkdev_put+0x2ce/0x3c0 blkdev_put+0x68/0x220 blkdev_close+0x4d/0x60 __fput+0x170/0x3b0 ____fput+0x12/0x20 task_work_run+0xa2/0xf0 exit_to_usermode_loop+0xeb/0xf0 do_syscall_64+0x2be/0x300 entry_SYSCALL_64_after_hwframe+0x49/0xbe Link: https://lore.kernel.org/r/20200330025151.10535-1-bvanassche@acm.org Fixes: 51a858817dcd ("scsi: sr: get rid of sr global mutex") Cc: Merlijn Wajer Cc: Arnd Bergmann Cc: Acked-by: Merlijn Wajer Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index fe0e1c721a99..2483100dc144 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -550,10 +550,12 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode) static void sr_block_release(struct gendisk *disk, fmode_t mode) { struct scsi_cd *cd = scsi_cd(disk); + mutex_lock(&cd->lock); cdrom_release(&cd->cdi, mode); - scsi_cd_put(cd); mutex_unlock(&cd->lock); + + scsi_cd_put(cd); } static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, From 4e6c78d16a798bdc54a95528be670535701540f8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 31 Mar 2020 10:41:11 +0200 Subject: [PATCH 106/280] scsi: aacraid: do not overwrite retval in aac_reset_adapter() 'retval' got assigned a value twice, causing the original value to be lost. Link: https://lore.kernel.org/r/20200331084111.95039-1-hare@suse.de Fixes: 3d3ca53b1639 ("scsi: aacraid: use scsi_host_(block,unblock) to block I/O") Reported-by: Martin K. Petersen Signed-off-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/commsup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 4725e4c763cf..ddd73f6798af 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1626,7 +1626,7 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced, u8 reset_type) int aac_reset_adapter(struct aac_dev *aac, int forced, u8 reset_type) { unsigned long flagv = 0; - int retval; + int retval, unblock_retval; struct Scsi_Host *host = aac->scsi_host_ptr; int bled; @@ -1656,8 +1656,9 @@ int aac_reset_adapter(struct aac_dev *aac, int forced, u8 reset_type) retval = _aac_reset_adapter(aac, bled, reset_type); spin_unlock_irqrestore(host->host_lock, flagv); - retval = scsi_host_unblock(host, SDEV_RUNNING); - + unblock_retval = scsi_host_unblock(host, SDEV_RUNNING); + if (!retval) + retval = unblock_retval; if ((forced < 2) && (retval == -ENODEV)) { /* Unwind aac_send_shutdown() IOP_RESET unsupported/disabled */ struct fib * fibctx = aac_fib_alloc(aac); From cec9cbac5244b017f2671e3770abfacc939d753d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 31 Mar 2020 16:21:48 +0200 Subject: [PATCH 107/280] scsi: zfcp: use fallthrough; Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ Signed-off-by: Joe Perches Reviewed-by: Fedor Loshakov Reviewed-by: Steffen Maier [bblock@linux.ibm.com: resolved merge conflict with recently upstream-sent patch "zfcp: expose fabric name as common fc_host sysfs attribute"] Link: https://lore.kernel.org/r/d14669a67a17392490d3184117941123765db1a4.1585663010.git.bblock@linux.ibm.com Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_erp.c | 10 +++++----- drivers/s390/scsi/zfcp_fsf.c | 23 +++++++++++------------ 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 18a6751299f9..3d0bc000f500 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -178,12 +178,12 @@ static enum zfcp_erp_act_type zfcp_erp_required_act(enum zfcp_erp_act_type want, return 0; if (!(p_status & ZFCP_STATUS_COMMON_UNBLOCKED)) need = ZFCP_ERP_ACTION_REOPEN_PORT; - /* fall through */ + fallthrough; case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: p_status = atomic_read(&port->status); if (!(p_status & ZFCP_STATUS_COMMON_OPEN)) need = ZFCP_ERP_ACTION_REOPEN_PORT; - /* fall through */ + fallthrough; case ZFCP_ERP_ACTION_REOPEN_PORT: p_status = atomic_read(&port->status); if (p_status & ZFCP_STATUS_COMMON_ERP_INUSE) @@ -196,7 +196,7 @@ static enum zfcp_erp_act_type zfcp_erp_required_act(enum zfcp_erp_act_type want, return need; if (!(a_status & ZFCP_STATUS_COMMON_UNBLOCKED)) need = ZFCP_ERP_ACTION_REOPEN_ADAPTER; - /* fall through */ + fallthrough; case ZFCP_ERP_ACTION_REOPEN_ADAPTER: a_status = atomic_read(&adapter->status); if (a_status & ZFCP_STATUS_COMMON_ERP_INUSE) @@ -1086,7 +1086,7 @@ static enum zfcp_erp_act_result zfcp_erp_lun_strategy( if (atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_OPEN) return zfcp_erp_lun_strategy_close(erp_action); /* already closed */ - /* fall through */ + fallthrough; case ZFCP_ERP_STEP_LUN_CLOSING: if (atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_OPEN) return ZFCP_ERP_FAILED; @@ -1415,7 +1415,7 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, if (act->step != ZFCP_ERP_STEP_UNINITIALIZED) if (result == ZFCP_ERP_SUCCEEDED) zfcp_erp_try_rport_unblock(port); - /* fall through */ + fallthrough; case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: put_device(&port->dev); break; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 662ddbc74263..e3dd3411ae9c 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -564,7 +564,7 @@ static int zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *req) case FSF_TOPO_AL: fc_host_port_type(shost) = FC_PORTTYPE_NLPORT; fc_host_fabric_name(shost) = 0; - /* fall through */ + fallthrough; default: fc_host_fabric_name(shost) = 0; dev_err(&adapter->ccw_device->dev, @@ -1032,7 +1032,7 @@ static void zfcp_fsf_abort_fcp_command_handler(struct zfcp_fsf_req *req) switch (fsq->word[0]) { case FSF_SQ_INVOKE_LINK_TEST_PROCEDURE: zfcp_fc_test_link(zfcp_sdev->port); - /* fall through */ + fallthrough; case FSF_SQ_ULP_DEPENDENT_ERP_REQUIRED: req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -1127,7 +1127,7 @@ static void zfcp_fsf_send_ct_handler(struct zfcp_fsf_req *req) break; case FSF_PORT_HANDLE_NOT_VALID: zfcp_erp_adapter_reopen(adapter, 0, "fsscth1"); - /* fall through */ + fallthrough; case FSF_GENERIC_COMMAND_REJECTED: case FSF_PAYLOAD_SIZE_MISMATCH: case FSF_REQUEST_SIZE_TOO_LARGE: @@ -1313,7 +1313,7 @@ static void zfcp_fsf_send_els_handler(struct zfcp_fsf_req *req) break; case FSF_SBAL_MISMATCH: /* should never occur, avoided in zfcp_fsf_send_els */ - /* fall through */ + fallthrough; default: req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -1736,7 +1736,7 @@ static void zfcp_fsf_open_port_handler(struct zfcp_fsf_req *req) switch (header->fsf_status_qual.word[0]) { case FSF_SQ_INVOKE_LINK_TEST_PROCEDURE: /* no zfcp_fc_test_link() with failed open port */ - /* fall through */ + fallthrough; case FSF_SQ_ULP_DEPENDENT_ERP_REQUIRED: case FSF_SQ_NO_RETRY_POSSIBLE: req->status |= ZFCP_STATUS_FSFREQ_ERROR; @@ -1909,14 +1909,14 @@ static void zfcp_fsf_open_wka_port_handler(struct zfcp_fsf_req *req) case FSF_MAXIMUM_NUMBER_OF_PORTS_EXCEEDED: dev_warn(&req->adapter->ccw_device->dev, "Opening WKA port 0x%x failed\n", wka_port->d_id); - /* fall through */ + fallthrough; case FSF_ADAPTER_STATUS_AVAILABLE: req->status |= ZFCP_STATUS_FSFREQ_ERROR; wka_port->status = ZFCP_FC_WKA_PORT_OFFLINE; break; case FSF_GOOD: wka_port->handle = header->port_handle; - /* fall through */ + fallthrough; case FSF_PORT_ALREADY_OPEN: wka_port->status = ZFCP_FC_WKA_PORT_ONLINE; } @@ -2059,7 +2059,6 @@ static void zfcp_fsf_close_physical_port_handler(struct zfcp_fsf_req *req) case FSF_ADAPTER_STATUS_AVAILABLE: switch (header->fsf_status_qual.word[0]) { case FSF_SQ_INVOKE_LINK_TEST_PROCEDURE: - /* fall through */ case FSF_SQ_ULP_DEPENDENT_ERP_REQUIRED: req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -2144,7 +2143,7 @@ static void zfcp_fsf_open_lun_handler(struct zfcp_fsf_req *req) case FSF_PORT_HANDLE_NOT_VALID: zfcp_erp_adapter_reopen(adapter, 0, "fsouh_1"); - /* fall through */ + fallthrough; case FSF_LUN_ALREADY_OPEN: break; case FSF_PORT_BOXED: @@ -2175,7 +2174,7 @@ static void zfcp_fsf_open_lun_handler(struct zfcp_fsf_req *req) (unsigned long long)zfcp_scsi_dev_lun(sdev), (unsigned long long)zfcp_sdev->port->wwpn); zfcp_erp_set_lun_status(sdev, ZFCP_STATUS_COMMON_ERP_FAILED); - /* fall through */ + fallthrough; case FSF_INVALID_COMMAND_OPTION: req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -2183,7 +2182,7 @@ static void zfcp_fsf_open_lun_handler(struct zfcp_fsf_req *req) switch (header->fsf_status_qual.word[0]) { case FSF_SQ_INVOKE_LINK_TEST_PROCEDURE: zfcp_fc_test_link(zfcp_sdev->port); - /* fall through */ + fallthrough; case FSF_SQ_ULP_DEPENDENT_ERP_REQUIRED: req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -2277,7 +2276,7 @@ static void zfcp_fsf_close_lun_handler(struct zfcp_fsf_req *req) switch (req->qtcb->header.fsf_status_qual.word[0]) { case FSF_SQ_INVOKE_LINK_TEST_PROCEDURE: zfcp_fc_test_link(zfcp_sdev->port); - /* fall through */ + fallthrough; case FSF_SQ_ULP_DEPENDENT_ERP_REQUIRED: req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; From 60f537d5755221efdcdae1415a4c745164c31214 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Sat, 28 Mar 2020 19:07:28 +0800 Subject: [PATCH 108/280] scsi: bnx2fc: fix boolreturn.cocci warnings drivers/scsi/bnx2fc/bnx2fc_hwi.c:1019:9-10: WARNING: return of 0/1 in function 'bnx2fc_pending_work' with return type bool Return statements in functions returning bool should use true/false instead of 1/0. Generated by: scripts/coccinelle/misc/boolreturn.cocci Fixes: 77331115e220 ("scsi: bnx2fc: Process the RQE with CQE in interrupt context") CC: Javed Hasan Acked-by: Javed Hasan Signed-off-by: kbuild test robot Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_hwi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c index eb41b0080f57..1f7c58b4c535 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c +++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c @@ -1016,7 +1016,7 @@ static bool bnx2fc_pending_work(struct bnx2fc_rport *tgt, unsigned int wqe) xid = wqe & FCOE_PEND_WQ_CQE_TASK_ID; if (xid >= hba->max_tasks) { pr_err(PFX "ERROR:xid out of range\n"); - return 0; + return false; } task_idx = xid / BNX2FC_TASKS_PER_PAGE; @@ -1059,14 +1059,14 @@ static bool bnx2fc_pending_work(struct bnx2fc_rport *tgt, unsigned int wqe) list_add_tail(&work->list, &fps->work_list); wake_up_process(fps->iothread); spin_unlock_bh(&fps->fp_work_lock); - return 1; + return true; } } spin_unlock_bh(&fps->fp_work_lock); bnx2fc_process_cq_compl(tgt, wqe, rq_data_buff, num_rq, task); - return 1; + return true; } int bnx2fc_process_new_cqes(struct bnx2fc_rport *tgt) From 39d06079a50fe2a651091b38e311e605de0788cb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 31 Mar 2020 22:44:23 -0700 Subject: [PATCH 109/280] nvme-tcp: fix possible crash in recv error flow If the target misbehaves and sends us unexpected payload we need to make sure to fail the controller and stop processing the input stream. We clear the rd_enabled flag and stop the io_work, but we may still requeue it if we still have pending sends and then in the next invocation we will process the input stream as the check is only in the .data_ready upcall. To fix this we need to make sure not to self-requeue io_work upon a recv flow error. This fixes the crash: nvme nvme2: receive failed: -22 BUG: unable to handle page fault for address: ffffbeb5816c3b48 nvme_ns_head_make_request: 29 callbacks suppressed block nvme0n5: no usable path - requeuing I/O block nvme0n5: no usable path - requeuing I/O block nvme0n7: no usable path - requeuing I/O block nvme0n7: no usable path - requeuing I/O block nvme0n3: no usable path - requeuing I/O block nvme0n3: no usable path - requeuing I/O block nvme0n3: no usable path - requeuing I/O block nvme0n7: no usable path - requeuing I/O block nvme0n3: no usable path - requeuing I/O block nvme0n3: no usable path - requeuing I/O #PF: supervisor read access inkernel mode #PF: error_code(0x0000) - not-present page PGD 1039157067 P4D 1039157067 PUD 103915a067 PMD 102719f067 PTE 0 Oops: 0000 [#1] SMP PTI CPU: 8 PID: 411 Comm: kworker/8:1H Not tainted 5.3.0-40-generic #32~18.04.1-Ubuntu Hardware name: Supermicro Super Server/X10SRi-F, BIOS 2.0 12/17/2015 Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp] RIP: 0010:nvme_tcp_recv_skb+0x2ae/0xb50 [nvme_tcp] RSP: 0018:ffffbeb5806cfd10 EFLAGS: 00010246 RAX: ffffbeb5816c3b48 RBX: 00000000000003d0 RCX: 0000000000000008 RDX: 00000000000003d0 RSI: 0000000000000001 RDI: ffff9a3040684b40 RBP: ffffbeb5806cfd90 R08: 0000000000000000 R09: ffffffff946e6900 R10: ffffbeb5806cfce0 R11: 0000000000000001 R12: 0000000000000000 R13: ffff9a2ff86501c0 R14: 00000000000003d0 R15: ffff9a30b85f2798 FS: 0000000000000000(0000) GS:ffff9a30bf800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffbeb5816c3b48 CR3: 000000088400a006 CR4: 00000000003626e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_read_sock+0x8c/0x290 ? __release_sock+0x9d/0xe0 ? nvme_tcp_write_space+0xb0/0xb0 [nvme_tcp] nvme_tcp_io_work+0x4b4/0x830 [nvme_tcp] ? finish_task_switch+0x163/0x270 process_one_work+0x1fd/0x3f0 worker_thread+0x34/0x410 kthread+0x121/0x140 ? process_one_work+0x3f0/0x3f0 ? kthread_park+0xb0/0xb0 ret_from_fork+0x35/0x40 Reported-by: Roy Shterman Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index eb31c689d2cf..c15a92163c1f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1073,7 +1073,7 @@ static void nvme_tcp_io_work(struct work_struct *w) if (result > 0) pending = true; else if (unlikely(result < 0)) - break; + return; if (!pending) return; From 21fca8bdbb64df1297e8c65a746c4c9f4a689751 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EA=B2=BD=ED=83=9D?= Date: Wed, 1 Apr 2020 10:04:21 +0900 Subject: [PATCH 110/280] ASoC: dpcm: allow start or stop during pause for backend soc_compr_trigger_fe() allows start or stop after pause_push. In dpcm_be_dai_trigger(), however, only pause_release is allowed command after pause_push. So, start or stop after pause in compress offload is always returned as error if the compress offload is used with dpcm. To fix the problem, SND_SOC_DPCM_STATE_PAUSED should be allowed for start or stop command. Signed-off-by: Gyeongtaek Lee Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/004d01d607c1$7a3d5250$6eb7f6f0$@samsung.com Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index e256d438ee68..289aebc15529 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2324,7 +2324,8 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, switch (cmd) { case SNDRV_PCM_TRIGGER_START: if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP)) + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED)) continue; ret = dpcm_do_trigger(dpcm, be_substream, cmd); @@ -2354,7 +2355,8 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, be->dpcm[stream].state = SND_SOC_DPCM_STATE_START; break; case SNDRV_PCM_TRIGGER_STOP: - if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) + if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED)) continue; if (!snd_soc_dpcm_can_be_free_stop(fe, be, stream)) From 59564e117356d5bb6df6876c03d7d650361781c9 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 1 Apr 2020 17:10:55 +0800 Subject: [PATCH 111/280] ASoC: rt5682: Fix build error without CONFIG_I2C If I2C is n but SoundWire is m, building fails: sound/soc/codecs/rt5682.c:3716:1: warning: data definition has no type or storage class module_i2c_driver(rt5682_i2c_driver); ^~~~~~~~~~~~~~~~~ sound/soc/codecs/rt5682.c:3716:1: error: type defaults to 'int' in declaration of 'module_i2c_driver' [-Werror=implicit-int] sound/soc/codecs/rt5682.c:3716:1: warning: parameter names (without types) in function declaration Guard this use #ifdef CONFIG_I2C. Fixes: 5549ea647997 ("ASoC: rt5682: fix unmet dependencies") Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20200401091055.34112-1-yuehaibing@huawei.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index c9268a230daa..d36f560ad7a8 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -3703,7 +3703,7 @@ static const struct acpi_device_id rt5682_acpi_match[] = { MODULE_DEVICE_TABLE(acpi, rt5682_acpi_match); #endif -static struct i2c_driver rt5682_i2c_driver = { +static struct i2c_driver __maybe_unused rt5682_i2c_driver = { .driver = { .name = "rt5682", .of_match_table = of_match_ptr(rt5682_of_match), @@ -3713,7 +3713,10 @@ static struct i2c_driver rt5682_i2c_driver = { .shutdown = rt5682_i2c_shutdown, .id_table = rt5682_i2c_id, }; + +#ifdef CONFIG_I2C module_i2c_driver(rt5682_i2c_driver); +#endif MODULE_DESCRIPTION("ASoC RT5682 driver"); MODULE_AUTHOR("Bard Liao "); From abca9e4a04fbe9c6df4d48ca7517e1611812af25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EA=B2=BD=ED=83=9D?= Date: Wed, 1 Apr 2020 18:05:24 +0900 Subject: [PATCH 112/280] ASoC: topology: use name_prefix for new kcontrol Current topology doesn't add prefix of component to new kcontrol. Signed-off-by: Gyeongtaek Lee Link: https://lore.kernel.org/r/009b01d60804$ae25c2d0$0a714870$@samsung.com Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 1f81cd2d29cf..87f75edba3dc 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -362,7 +362,7 @@ static int soc_tplg_add_kcontrol(struct soc_tplg *tplg, struct snd_soc_component *comp = tplg->comp; return soc_tplg_add_dcontrol(comp->card->snd_card, - comp->dev, k, NULL, comp, kcontrol); + comp->dev, k, comp->name_prefix, comp, kcontrol); } /* remove a mixer kcontrol */ From 10bea96dcc13ad841d53bdcc9d8e731e9e0ad58f Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Wed, 1 Apr 2020 17:19:33 +0800 Subject: [PATCH 113/280] io_uring: add missing finish_wait() in io_sq_thread() Add it to pair with prepare_to_wait() in an attempt to avoid anything weird in the field. Fixes: b41e98524e42 ("io_uring: add per-task callback handler") Reported-by: syzbot+0c3370f235b74b3cfd97@syzkaller.appspotmail.com Signed-off-by: Hillf Danton Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7b5087904640..10645077d6b4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5968,6 +5968,7 @@ static int io_sq_thread(void *data) } if (current->task_works) { task_work_run(); + finish_wait(&ctx->sqo_wait, &wait); continue; } if (signal_pending(current)) From d866dbf6178713e37d2fec2870af00b345684e1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jul 2019 10:37:22 -0700 Subject: [PATCH 114/280] blkcg: rename blkcg->cgwb_refcnt to ->online_pin and always use it blkcg->cgwb_refcnt is used to delay blkcg offlining so that blkgs don't get offlined while there are active cgwbs on them. However, it ends up making offlining unordered sometimes causing parents to be offlined before children. To fix it, we want child blkcgs to pin the parents' online states turning the refcnt into a more generic online pinning mechanism. In prepartion, * blkcg->cgwb_refcnt -> blkcg->online_pin * blkcg_cgwb_get/put() -> blkcg_pin/unpin_online() * Take them out of CONFIG_CGROUP_WRITEBACK Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 6 +++--- include/linux/blk-cgroup.h | 39 +++++++++++++------------------------- mm/backing-dev.c | 6 +++--- 3 files changed, 19 insertions(+), 32 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c15a26096038..2acef6a64954 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -883,8 +883,8 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) /* this prevents anyone from attaching or migrating to this blkcg */ wb_blkcg_offline(blkcg); - /* put the base cgwb reference allowing step 2 to be triggered */ - blkcg_cgwb_put(blkcg); + /* put the base online pin allowing step 2 to be triggered */ + blkcg_unpin_online(blkcg); } /** @@ -983,11 +983,11 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) } spin_lock_init(&blkcg->lock); + refcount_set(&blkcg->online_pin, 1); INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN); INIT_HLIST_HEAD(&blkcg->blkg_list); #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&blkcg->cgwb_list); - refcount_set(&blkcg->cgwb_refcnt, 1); #endif list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index e4a6949fd171..7fb7caa55a3d 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -46,6 +46,7 @@ struct blkcg_gq; struct blkcg { struct cgroup_subsys_state css; spinlock_t lock; + refcount_t online_pin; struct radix_tree_root blkg_tree; struct blkcg_gq __rcu *blkg_hint; @@ -56,7 +57,6 @@ struct blkcg { struct list_head all_blkcgs_node; #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; - refcount_t cgwb_refcnt; #endif }; @@ -412,47 +412,34 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) extern void blkcg_destroy_blkgs(struct blkcg *blkcg); -#ifdef CONFIG_CGROUP_WRITEBACK - /** - * blkcg_cgwb_get - get a reference for blkcg->cgwb_list + * blkcg_pin_online - pin online state * @blkcg: blkcg of interest * - * This is used to track the number of active wb's related to a blkcg. + * While pinned, a blkcg is kept online. This is primarily used to + * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline + * while an associated cgwb is still active. */ -static inline void blkcg_cgwb_get(struct blkcg *blkcg) +static inline void blkcg_pin_online(struct blkcg *blkcg) { - refcount_inc(&blkcg->cgwb_refcnt); + refcount_inc(&blkcg->online_pin); } /** - * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list + * blkcg_unpin_online - unpin online state * @blkcg: blkcg of interest * - * This is used to track the number of active wb's related to a blkcg. - * When this count goes to zero, all active wb has finished so the + * This is primarily used to impedance-match blkg and cgwb lifetimes so + * that blkg doesn't go offline while an associated cgwb is still active. + * When this count goes to zero, all active cgwbs have finished so the * blkcg can continue destruction by calling blkcg_destroy_blkgs(). - * This work may occur in cgwb_release_workfn() on the cgwb_release - * workqueue. */ -static inline void blkcg_cgwb_put(struct blkcg *blkcg) +static inline void blkcg_unpin_online(struct blkcg *blkcg) { - if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) + if (refcount_dec_and_test(&blkcg->online_pin)) blkcg_destroy_blkgs(blkcg); } -#else - -static inline void blkcg_cgwb_get(struct blkcg *blkcg) { } - -static inline void blkcg_cgwb_put(struct blkcg *blkcg) -{ - /* wb isn't being accounted, so trigger destruction right away */ - blkcg_destroy_blkgs(blkcg); -} - -#endif - /** * blkg_path - format cgroup path of blkg * @blkg: blkg of interest diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 62f05f605fb5..c81b4f3a7268 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -491,8 +491,8 @@ static void cgwb_release_workfn(struct work_struct *work) css_put(wb->blkcg_css); mutex_unlock(&wb->bdi->cgwb_release_mutex); - /* triggers blkg destruction if cgwb_refcnt becomes zero */ - blkcg_cgwb_put(blkcg); + /* triggers blkg destruction if no online users left */ + blkcg_unpin_online(blkcg); fprop_local_destroy_percpu(&wb->memcg_completions); percpu_ref_exit(&wb->refcnt); @@ -592,7 +592,7 @@ static int cgwb_create(struct backing_dev_info *bdi, list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); list_add(&wb->memcg_node, memcg_cgwb_list); list_add(&wb->blkcg_node, blkcg_cgwb_list); - blkcg_cgwb_get(blkcg); + blkcg_pin_online(blkcg); css_get(memcg_css); css_get(blkcg_css); } From 4308a434e5e08c78676aa66bc626ef78cbef0883 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jul 2019 10:37:55 -0700 Subject: [PATCH 115/280] blkcg: don't offline parent blkcg first blkcg->cgwb_refcnt is used to delay blkcg offlining so that blkgs don't get offlined while there are active cgwbs on them. However, it ends up making offlining unordered sometimes causing parents to be offlined before children. Let's fix this by making child blkcgs pin the parents' online states. Note that pin/unpin names are chosen over get/put intentionally because css uses get/put online for something different. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 16 ++++++++++++++++ include/linux/blk-cgroup.h | 6 +++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 2acef6a64954..c5dc833212e1 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1006,6 +1006,21 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) return ret; } +static int blkcg_css_online(struct cgroup_subsys_state *css) +{ + struct blkcg *blkcg = css_to_blkcg(css); + struct blkcg *parent = blkcg_parent(blkcg); + + /* + * blkcg_pin_online() is used to delay blkcg offline so that blkgs + * don't go offline while cgwbs are still active on them. Pin the + * parent so that offline always happens towards the root. + */ + if (parent) + blkcg_pin_online(parent); + return 0; +} + /** * blkcg_init_queue - initialize blkcg part of request queue * @q: request_queue to initialize @@ -1199,6 +1214,7 @@ static void blkcg_exit(struct task_struct *tsk) struct cgroup_subsys io_cgrp_subsys = { .css_alloc = blkcg_css_alloc, + .css_online = blkcg_css_online, .css_offline = blkcg_css_offline, .css_free = blkcg_css_free, .can_attach = blkcg_can_attach, diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 7fb7caa55a3d..35f8ffe92b70 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -436,8 +436,12 @@ static inline void blkcg_pin_online(struct blkcg *blkcg) */ static inline void blkcg_unpin_online(struct blkcg *blkcg) { - if (refcount_dec_and_test(&blkcg->online_pin)) + do { + if (!refcount_dec_and_test(&blkcg->online_pin)) + break; blkcg_destroy_blkgs(blkcg); + blkcg = blkcg_parent(blkcg); + } while (blkcg); } /** From 6a9df8185720bdf93a893d7478d43db7fbe15473 Mon Sep 17 00:00:00 2001 From: Can Guo Date: Tue, 11 Feb 2020 21:38:28 -0800 Subject: [PATCH 116/280] scsi: ufs: Use ufshcd_config_pwr_mode() when scaling gear When scaling gear, use ufshcd_config_pwr_mode() instead of ufshcd_change_power_mode() so that vops_pwr_change_notify(PRE_CHANGE) can be utilized to allow vendors use customized settings before changing power mode. Link: https://lore.kernel.org/r/1581485910-8307-2-git-send-email-cang@codeaurora.org Reviewed-by: Avri Altman Reviewed-by: Stanley Chu Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index ad61516fc64f..64e42efc8d0c 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1078,8 +1078,7 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up) } /* check if the power mode needs to be changed or not? */ - ret = ufshcd_change_power_mode(hba, &new_pwr_info); - + ret = ufshcd_config_pwr_mode(hba, &new_pwr_info); if (ret) dev_err(hba->dev, "%s: failed err %d, old gear: (tx %d rx %d), new gear: (tx %d rx %d)", __func__, ret, @@ -4116,8 +4115,6 @@ int ufshcd_config_pwr_mode(struct ufs_hba *hba, memcpy(&final_params, desired_pwr_mode, sizeof(final_params)); ret = ufshcd_change_power_mode(hba, &final_params); - if (!ret) - ufshcd_print_pwr_info(hba); return ret; } @@ -7131,6 +7128,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool async) __func__, ret); goto out; } + ufshcd_print_pwr_info(hba); } /* From 1b55940b9bcc64acb7336224b0e49203ff7987c6 Mon Sep 17 00:00:00 2001 From: Nikhil Kshirsagar Date: Wed, 1 Apr 2020 09:25:00 +0530 Subject: [PATCH 117/280] scsi: core: Add DID_ALLOC_FAILURE and DID_MEDIUM_ERROR to hostbyte_table Since DID_ALLOC_FAILURE and DID_MEDIUM_ERROR are missing from the hostbyte_table, scsi debug logging prints their numeric values only. Adding them to the hostbyte_table to allow the scsi debug log to print those as strings. Link: https://lore.kernel.org/r/CAMNNMLFtQOHsjWUMs+q_+z9XqQYZmR34ewoB-5LrCpzGp1Ppkw@mail.gmail.com Reviewed-by: Christoph Hellwig Signed-off-by: Nikhil Kshirsagar Signed-off-by: Martin K. Petersen --- drivers/scsi/constants.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/constants.c b/drivers/scsi/constants.c index d4c2a2e4c5d4..84d73f57292b 100644 --- a/drivers/scsi/constants.c +++ b/drivers/scsi/constants.c @@ -404,7 +404,7 @@ static const char * const hostbyte_table[]={ "DID_ABORT", "DID_PARITY", "DID_ERROR", "DID_RESET", "DID_BAD_INTR", "DID_PASSTHROUGH", "DID_SOFT_ERROR", "DID_IMM_RETRY", "DID_REQUEUE", "DID_TRANSPORT_DISRUPTED", "DID_TRANSPORT_FAILFAST", "DID_TARGET_FAILURE", -"DID_NEXUS_FAILURE" }; +"DID_NEXUS_FAILURE", "DID_ALLOC_FAILURE", "DID_MEDIUM_ERROR" }; static const char * const driverbyte_table[]={ "DRIVER_OK", "DRIVER_BUSY", "DRIVER_SOFT", "DRIVER_MEDIA", "DRIVER_ERROR", From 74e4d20e2f43cf09a35543d960ac8f7a1ffcbbb5 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Apr 2020 22:44:43 -0700 Subject: [PATCH 118/280] nvme: inherit stable pages constraint in the mpath stack device If the backing device require stable pages, we need to set it on the stack mpath device as well. This applies to rdma/fc transports when doing data integrity and tcp transport calculating digests. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2db8563aeb2d..91c1bd659947 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1897,6 +1897,13 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->head->disk) { nvme_update_disk_info(ns->head->disk, ns, id); blk_queue_stack_limits(ns->head->disk->queue, ns->queue); + if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { + struct backing_dev_info *info = + ns->head->disk->queue->backing_dev_info; + + info->capabilities |= BDI_CAP_STABLE_WRITES; + } + revalidate_disk(ns->head->disk); } #endif From f0e656e4f253120eb871a53ffab7664530c1d9f4 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Apr 2020 16:16:27 -0700 Subject: [PATCH 119/280] nvmet: fix NULL dereference when removing a referral When item release is called, the parent is already null. We need the parent to pass to nvmet_referral_disable so hook it up to ->disconnect_notify. Reported-by: Tony Asleson Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 7aa10788b7c8..58cabd7b6fc5 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1098,12 +1098,19 @@ static struct configfs_attribute *nvmet_referral_attrs[] = { NULL, }; -static void nvmet_referral_release(struct config_item *item) +static void nvmet_referral_notify(struct config_group *group, + struct config_item *item) { struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent); struct nvmet_port *port = to_nvmet_port(item); nvmet_referral_disable(parent, port); +} + +static void nvmet_referral_release(struct config_item *item) +{ + struct nvmet_port *port = to_nvmet_port(item); + kfree(port); } @@ -1134,6 +1141,7 @@ static struct config_group *nvmet_referral_make( static struct configfs_group_operations nvmet_referral_group_ops = { .make_group = nvmet_referral_make, + .disconnect_notify = nvmet_referral_notify, }; static const struct config_item_type nvmet_referrals_type = { From d9fdd0adf932c8d615cfe52bbc689c373a95377f Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 2 Apr 2020 08:18:57 -0700 Subject: [PATCH 120/280] xfs: fix inode number overflow in ifree cluster helper Qian Cai reports seemingly random buffer read verifier errors during filesystem writeback. This was isolated to a recent patch that factored out some inode cluster freeing code and happened to cast an unsigned inode number type to a signed value. If the inode number value overflows, we can skip marking in-core inodes associated with the underlying buffer stale at the time the physical inodes are freed. If such an inode happens to be dirty, xfsaild will eventually attempt to write it back over non-inode blocks. The invalidation of the underlying inode buffer causes writeback to read the buffer from disk. This fails the read verifier (preventing eventual corruption) if the buffer no longer looks like an inode cluster. Analysis by Dave Chinner. Fix up the helper to use the proper type for inode number values. Fixes: 5806165a6663 ("xfs: factor inode lookup from xfs_ifree_cluster") Reported-by: Qian Cai Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 0cac0d37e3ae..ae86c870da92 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2511,7 +2511,7 @@ static struct xfs_inode * xfs_ifree_get_one_inode( struct xfs_perag *pag, struct xfs_inode *free_ip, - int inum) + xfs_ino_t inum) { struct xfs_mount *mp = pag->pag_mount; struct xfs_inode *ip; From ca707b3f00b4f31a6e1eb37e8ae99f15f2bb1fe5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 2 Apr 2020 19:43:11 +0200 Subject: [PATCH 121/280] ALSA: hda/realtek - Add quirk for Lenovo Carbon X1 8th gen The audio setup on the Lenovo Carbon X1 8th gen is the same as that on the Lenovo Carbon X1 7th gen, as such it needs the same ALC285_FIXUP_THINKPAD_HEADSET_JACK quirk. This fixes volume control of the speaker not working among other things. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1820196 Cc: stable@vger.kernel.org Suggested-by: Jaroslav Kysela Signed-off-by: Hans de Goede Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20200402174311.238614-1-hdegoede@redhat.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f66a48154a57..a32833c65db8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7325,6 +7325,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x225d, "Thinkpad T480", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Yoga 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x2293, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), + SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), From dbdd24eaac4ecc482e2d433175c0e82b9f107bef Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 20 Mar 2020 09:44:29 +0100 Subject: [PATCH 122/280] edd: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Link: https://lore.kernel.org/r/20200320084429.1803-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- drivers/firmware/edd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/edd.c b/drivers/firmware/edd.c index 29906e39ab4b..14d0970a7198 100644 --- a/drivers/firmware/edd.c +++ b/drivers/firmware/edd.c @@ -341,7 +341,7 @@ edd_show_legacy_max_cylinder(struct edd_device *edev, char *buf) if (!info || !buf) return -EINVAL; - p += snprintf(p, left, "%u\n", info->legacy_max_cylinder); + p += scnprintf(p, left, "%u\n", info->legacy_max_cylinder); return (p - buf); } @@ -356,7 +356,7 @@ edd_show_legacy_max_head(struct edd_device *edev, char *buf) if (!info || !buf) return -EINVAL; - p += snprintf(p, left, "%u\n", info->legacy_max_head); + p += scnprintf(p, left, "%u\n", info->legacy_max_head); return (p - buf); } @@ -371,7 +371,7 @@ edd_show_legacy_sectors_per_track(struct edd_device *edev, char *buf) if (!info || !buf) return -EINVAL; - p += snprintf(p, left, "%u\n", info->legacy_sectors_per_track); + p += scnprintf(p, left, "%u\n", info->legacy_sectors_per_track); return (p - buf); } From ae769d3556644888c964635179ef192995f40793 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 3 Apr 2020 09:25:15 +0200 Subject: [PATCH 123/280] ALSA: pcm: oss: Fix regression by buffer overflow fix The recent fix for the OOB access in PCM OSS plugins (commit f2ecf903ef06: "ALSA: pcm: oss: Avoid plugin buffer overflow") caused a regression on OSS applications. The patch introduced the size check in client and slave size calculations to limit to each plugin's buffer size, but I overlooked that some code paths call those without allocating the buffer but just for estimation. This patch fixes the bug by skipping the size check for those code paths while keeping checking in the actual transfer calls. Fixes: f2ecf903ef06 ("ALSA: pcm: oss: Avoid plugin buffer overflow") Tested-and-reported-by: Jari Ruusu Cc: Link: https://lore.kernel.org/r/20200403072515.25539-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_plugin.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 752d078908e9..50c35ecc8953 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -196,7 +196,9 @@ int snd_pcm_plugin_free(struct snd_pcm_plugin *plugin) return 0; } -snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_pcm_uframes_t drv_frames) +static snd_pcm_sframes_t plug_client_size(struct snd_pcm_substream *plug, + snd_pcm_uframes_t drv_frames, + bool check_size) { struct snd_pcm_plugin *plugin, *plugin_prev, *plugin_next; int stream; @@ -209,7 +211,7 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p if (stream == SNDRV_PCM_STREAM_PLAYBACK) { plugin = snd_pcm_plug_last(plug); while (plugin && drv_frames > 0) { - if (drv_frames > plugin->buf_frames) + if (check_size && drv_frames > plugin->buf_frames) drv_frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) @@ -222,7 +224,7 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p plugin_next = plugin->next; if (plugin->dst_frames) drv_frames = plugin->dst_frames(plugin, drv_frames); - if (drv_frames > plugin->buf_frames) + if (check_size && drv_frames > plugin->buf_frames) drv_frames = plugin->buf_frames; plugin = plugin_next; } @@ -231,7 +233,9 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p return drv_frames; } -snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pcm_uframes_t clt_frames) +static snd_pcm_sframes_t plug_slave_size(struct snd_pcm_substream *plug, + snd_pcm_uframes_t clt_frames, + bool check_size) { struct snd_pcm_plugin *plugin, *plugin_prev, *plugin_next; snd_pcm_sframes_t frames; @@ -252,14 +256,14 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc if (frames < 0) return frames; } - if (frames > plugin->buf_frames) + if (check_size && frames > plugin->buf_frames) frames = plugin->buf_frames; plugin = plugin_next; } } else if (stream == SNDRV_PCM_STREAM_CAPTURE) { plugin = snd_pcm_plug_last(plug); while (plugin) { - if (frames > plugin->buf_frames) + if (check_size && frames > plugin->buf_frames) frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) { @@ -274,6 +278,18 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc return frames; } +snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, + snd_pcm_uframes_t drv_frames) +{ + return plug_client_size(plug, drv_frames, false); +} + +snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, + snd_pcm_uframes_t clt_frames) +{ + return plug_slave_size(plug, clt_frames, false); +} + static int snd_pcm_plug_formats(const struct snd_mask *mask, snd_pcm_format_t format) { @@ -630,7 +646,7 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st src_channels = dst_channels; plugin = next; } - return snd_pcm_plug_client_size(plug, frames); + return plug_client_size(plug, frames, true); } snd_pcm_sframes_t snd_pcm_plug_read_transfer(struct snd_pcm_substream *plug, struct snd_pcm_plugin_channel *dst_channels_final, snd_pcm_uframes_t size) @@ -640,7 +656,7 @@ snd_pcm_sframes_t snd_pcm_plug_read_transfer(struct snd_pcm_substream *plug, str snd_pcm_sframes_t frames = size; int err; - frames = snd_pcm_plug_slave_size(plug, frames); + frames = plug_slave_size(plug, frames, true); if (frames < 0) return frames; From ff7e06a55676931ab32db54f69189482a948c5ec Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 3 Apr 2020 09:38:18 +0200 Subject: [PATCH 124/280] ALSA: pcm: oss: Fix regression by buffer overflow fix (again) [ This is essentially the same fix as commit ae769d355664, but it's adapted to the latest code for 5.7; hence it contains no Fixes or other tags for avoid backport confusion -- tiwai ] The recent fix for the OOB access in PCM OSS plugins (commit f2ecf903ef06: "ALSA: pcm: oss: Avoid plugin buffer overflow") caused a regression on OSS applications. The patch introduced the size check in client and slave size calculations to limit to each plugin's buffer size, but I overlooked that some code paths call those without allocating the buffer but just for estimation. This patch fixes the bug by skipping the size check for those code paths while keeping checking in the actual transfer calls. Link: https://lore.kernel.org/r/20200403073818.27943-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_plugin.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index fbda4ebf38b3..59d62f05658f 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -197,7 +197,8 @@ int snd_pcm_plugin_free(struct snd_pcm_plugin *plugin) } static snd_pcm_sframes_t calc_dst_frames(struct snd_pcm_substream *plug, - snd_pcm_sframes_t frames) + snd_pcm_sframes_t frames, + bool check_size) { struct snd_pcm_plugin *plugin, *plugin_next; @@ -209,7 +210,7 @@ static snd_pcm_sframes_t calc_dst_frames(struct snd_pcm_substream *plug, if (frames < 0) return frames; } - if (frames > plugin->buf_frames) + if (check_size && frames > plugin->buf_frames) frames = plugin->buf_frames; plugin = plugin_next; } @@ -217,13 +218,14 @@ static snd_pcm_sframes_t calc_dst_frames(struct snd_pcm_substream *plug, } static snd_pcm_sframes_t calc_src_frames(struct snd_pcm_substream *plug, - snd_pcm_sframes_t frames) + snd_pcm_sframes_t frames, + bool check_size) { struct snd_pcm_plugin *plugin, *plugin_prev; plugin = snd_pcm_plug_last(plug); while (plugin && frames > 0) { - if (frames > plugin->buf_frames) + if (check_size && frames > plugin->buf_frames) frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) { @@ -242,9 +244,9 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p return -ENXIO; switch (snd_pcm_plug_stream(plug)) { case SNDRV_PCM_STREAM_PLAYBACK: - return calc_src_frames(plug, drv_frames); + return calc_src_frames(plug, drv_frames, false); case SNDRV_PCM_STREAM_CAPTURE: - return calc_dst_frames(plug, drv_frames); + return calc_dst_frames(plug, drv_frames, false); default: snd_BUG(); return -EINVAL; @@ -257,9 +259,9 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc return -ENXIO; switch (snd_pcm_plug_stream(plug)) { case SNDRV_PCM_STREAM_PLAYBACK: - return calc_dst_frames(plug, clt_frames); + return calc_dst_frames(plug, clt_frames, false); case SNDRV_PCM_STREAM_CAPTURE: - return calc_src_frames(plug, clt_frames); + return calc_src_frames(plug, clt_frames, false); default: snd_BUG(); return -EINVAL; @@ -622,7 +624,7 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st src_channels = dst_channels; plugin = next; } - return snd_pcm_plug_client_size(plug, frames); + return calc_src_frames(plug, frames, true); } snd_pcm_sframes_t snd_pcm_plug_read_transfer(struct snd_pcm_substream *plug, struct snd_pcm_plugin_channel *dst_channels_final, snd_pcm_uframes_t size) @@ -632,7 +634,7 @@ snd_pcm_sframes_t snd_pcm_plug_read_transfer(struct snd_pcm_substream *plug, str snd_pcm_sframes_t frames = size; int err; - frames = snd_pcm_plug_slave_size(plug, frames); + frames = calc_src_frames(plug, frames, true); if (frames < 0) return frames; From 73d8c94084341e2895169a0462dbc18167f01683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franti=C5=A1ek=20Ku=C4=8Dera?= Date: Wed, 1 Apr 2020 11:59:07 +0200 Subject: [PATCH 125/280] ALSA: usb-audio: Add Pioneer DJ DJM-250MK2 quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pioneer DJ DJM-250MK2 is a mixer that acts like a USB sound card. The MIDI controller part is standard but the PCM part is "vendor specific". Output is enabled by this quirk: 8 channels, 48 000 Hz, S24_3LE. Input is not working. Signed-off-by: František Kučera Link: https://lore.kernel.org/r/20200401095907.3387-1-konference@frantovo.cz Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 1c8719292eee..e009d584e7d0 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3592,5 +3592,47 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), } } }, +{ + /* + * Pioneer DJ DJM-250MK2 + * PCM is 8 channels out @ 48 fixed (endpoints 0x01). + * The output from computer to the mixer is usable. + * + * The input (phono or line to computer) is not working. + * It should be at endpoint 0x82 and probably also 8 channels, + * but it seems that it works only with Pioneer proprietary software. + * Even on officially supported OS, the Audacity was unable to record + * and Mixxx to recognize the control vinyls. + */ + USB_DEVICE_VENDOR_SPEC(0x2b73, 0x0017), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S24_3LE, + .channels = 8, // outputs + .iface = 0, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x01, + .ep_attr = USB_ENDPOINT_XFER_ISOC| + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 48000 } + } + }, + { + .ifnum = -1 + } + } + } +}, #undef USB_DEVICE_VENDOR_SPEC From 32830a0534700f86366f371b150b17f0f0d140d7 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 3 Apr 2020 17:04:08 +0800 Subject: [PATCH 126/280] ipmi: fix hung processes in __get_guid() The wait_event() function is used to detect command completion. When send_guid_cmd() returns an error, smi_send() has not been called to send data. Therefore, wait_event() should not be used on the error path, otherwise it will cause the following warning: [ 1361.588808] systemd-udevd D 0 1501 1436 0x00000004 [ 1361.588813] ffff883f4b1298c0 0000000000000000 ffff883f4b188000 ffff887f7e3d9f40 [ 1361.677952] ffff887f64bd4280 ffffc90037297a68 ffffffff8173ca3b ffffc90000000010 [ 1361.767077] 00ffc90037297ad0 ffff887f7e3d9f40 0000000000000286 ffff883f4b188000 [ 1361.856199] Call Trace: [ 1361.885578] [] ? __schedule+0x23b/0x780 [ 1361.951406] [] schedule+0x36/0x80 [ 1362.010979] [] get_guid+0x118/0x150 [ipmi_msghandler] [ 1362.091281] [] ? prepare_to_wait_event+0x100/0x100 [ 1362.168533] [] ipmi_register_smi+0x405/0x940 [ipmi_msghandler] [ 1362.258337] [] try_smi_init+0x529/0x950 [ipmi_si] [ 1362.334521] [] ? std_irq_setup+0xd0/0xd0 [ipmi_si] [ 1362.411701] [] init_ipmi_si+0x492/0x9e0 [ipmi_si] [ 1362.487917] [] ? ipmi_pci_probe+0x280/0x280 [ipmi_si] [ 1362.568219] [] do_one_initcall+0x50/0x180 [ 1362.636109] [] ? kmem_cache_alloc_trace+0x142/0x190 [ 1362.714330] [] do_init_module+0x5f/0x200 [ 1362.781208] [] load_module+0x1898/0x1de0 [ 1362.848069] [] ? __symbol_put+0x60/0x60 [ 1362.913886] [] ? security_kernel_post_read_file+0x6b/0x80 [ 1362.998514] [] SYSC_finit_module+0xe5/0x120 [ 1363.068463] [] ? SYSC_finit_module+0xe5/0x120 [ 1363.140513] [] SyS_finit_module+0xe/0x10 [ 1363.207364] [] do_syscall_64+0x74/0x180 Fixes: 50c812b2b951 ("[PATCH] ipmi: add full sysfs support") Signed-off-by: Wen Yang Cc: Corey Minyard Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: openipmi-developer@lists.sourceforge.net Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 2.6.17- Message-Id: <20200403090408.58745-1-wenyang@linux.alibaba.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 64ba16dcb681..c48d8f086382 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -3193,8 +3193,8 @@ static void __get_guid(struct ipmi_smi *intf) if (rv) /* Send failed, no GUID available. */ bmc->dyn_guid_set = 0; - - wait_event(intf->waitq, bmc->dyn_guid_set != 2); + else + wait_event(intf->waitq, bmc->dyn_guid_set != 2); /* dyn_guid_set makes the guid data available. */ smp_rmb(); From 782da920e3c328bc753d4bc433bf71a49d7b272d Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 16 Dec 2019 12:57:40 +1030 Subject: [PATCH 127/280] dt-bindings: ipmi: aspeed: Introduce a v2 binding for KCS The v2 binding utilises reg and renames some of the v1 properties. Signed-off-by: Andrew Jeffery Message-Id: <8aec8994bbe1186d257b0a712e13cf914c5ebe35.1576462051.git-series.andrew@aj.id.au> Reviewed-by: Rob Herring Signed-off-by: Corey Minyard --- .../bindings/ipmi/aspeed-kcs-bmc.txt | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt index d98a9bf45d6c..193e71ca96b0 100644 --- a/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt +++ b/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt @@ -1,9 +1,10 @@ -* Aspeed KCS (Keyboard Controller Style) IPMI interface +# Aspeed KCS (Keyboard Controller Style) IPMI interface The Aspeed SOCs (AST2400 and AST2500) are commonly used as BMCs (Baseboard Management Controllers) and the KCS interface can be used to perform in-band IPMI communication with their host. +## v1 Required properties: - compatible : should be one of "aspeed,ast2400-kcs-bmc" @@ -12,14 +13,21 @@ Required properties: - kcs_chan : The LPC channel number in the controller - kcs_addr : The host CPU IO map address +## v2 +Required properties: +- compatible : should be one of + "aspeed,ast2400-kcs-bmc-v2" + "aspeed,ast2500-kcs-bmc-v2" +- reg : The address and size of the IDR, ODR and STR registers +- interrupts : interrupt generated by the controller +- aspeed,lpc-io-reg : The host CPU LPC IO address for the device Example: - kcs3: kcs3@0 { - compatible = "aspeed,ast2500-kcs-bmc"; - reg = <0x0 0x80>; + kcs3: kcs@24 { + compatible = "aspeed,ast2500-kcs-bmc-v2"; + reg = <0x24 0x1>, <0x30 0x1>, <0x3c 0x1>; + aspeed,lpc-reg = <0xca2>; interrupts = <8>; - kcs_chan = <3>; - kcs_addr = <0xCA2>; status = "okay"; }; From af6432c76a13ef510bb3f1ef5917ed80c808192b Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 16 Dec 2019 12:57:41 +1030 Subject: [PATCH 128/280] ipmi: kcs: Finish configuring ASPEED KCS device before enable The interrupts were configured after the channel was enabled. Configure them beforehand so they will work. Signed-off-by: Andrew Jeffery Reviewed-by: Joel Stanley Reviewed-by: Haiyue Wang Message-Id: Signed-off-by: Corey Minyard --- drivers/char/ipmi/kcs_bmc_aspeed.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c index 3c955946e647..e3dd09022589 100644 --- a/drivers/char/ipmi/kcs_bmc_aspeed.c +++ b/drivers/char/ipmi/kcs_bmc_aspeed.c @@ -268,13 +268,14 @@ static int aspeed_kcs_probe(struct platform_device *pdev) kcs_bmc->io_inputb = aspeed_kcs_inb; kcs_bmc->io_outputb = aspeed_kcs_outb; + rc = aspeed_kcs_config_irq(kcs_bmc, pdev); + if (rc) + return rc; + dev_set_drvdata(dev, kcs_bmc); aspeed_kcs_set_address(kcs_bmc, addr); aspeed_kcs_enable_channel(kcs_bmc, true); - rc = aspeed_kcs_config_irq(kcs_bmc, pdev); - if (rc) - return rc; rc = misc_register(&kcs_bmc->miscdev); if (rc) { From 09f5f680707e4758ad948c362859f9598ca1275c Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 16 Dec 2019 12:57:42 +1030 Subject: [PATCH 129/280] ipmi: kcs: aspeed: Implement v2 bindings The v2 bindings allow us to extract the resources from the devicetree. The table in the driver is retained to derive the channel index, which removes the need for kcs_chan property from the v1 bindings. The v2 bindings allow us to reduce the number of warnings generated by the existing devicetree nodes. Signed-off-by: Andrew Jeffery Reviewed-by: Joel Stanley Reviewed-by: Haiyue Wang Message-Id: <01ef3787e9ddaa9d87cfd55a2ac793053b5a69de.1576462051.git-series.andrew@aj.id.au> Signed-off-by: Corey Minyard --- drivers/char/ipmi/kcs_bmc_aspeed.c | 154 +++++++++++++++++++++++------ 1 file changed, 126 insertions(+), 28 deletions(-) diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c index e3dd09022589..9422d55a0476 100644 --- a/drivers/char/ipmi/kcs_bmc_aspeed.c +++ b/drivers/char/ipmi/kcs_bmc_aspeed.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -233,38 +234,133 @@ static const struct kcs_ioreg ast_kcs_bmc_ioregs[KCS_CHANNEL_MAX] = { { .idr = LPC_IDR4, .odr = LPC_ODR4, .str = LPC_STR4 }, }; +static struct kcs_bmc *aspeed_kcs_probe_of_v1(struct platform_device *pdev) +{ + struct aspeed_kcs_bmc *priv; + struct device_node *np; + struct kcs_bmc *kcs; + u32 channel; + u32 slave; + int rc; + + np = pdev->dev.of_node; + + rc = of_property_read_u32(np, "kcs_chan", &channel); + if ((rc != 0) || (channel == 0 || channel > KCS_CHANNEL_MAX)) { + dev_err(&pdev->dev, "no valid 'kcs_chan' configured\n"); + return ERR_PTR(-EINVAL); + } + + kcs = kcs_bmc_alloc(&pdev->dev, sizeof(struct aspeed_kcs_bmc), channel); + if (!kcs) + return ERR_PTR(-ENOMEM); + + priv = kcs_bmc_priv(kcs); + priv->map = syscon_node_to_regmap(pdev->dev.parent->of_node); + if (IS_ERR(priv->map)) { + dev_err(&pdev->dev, "Couldn't get regmap\n"); + return ERR_PTR(-ENODEV); + } + + rc = of_property_read_u32(np, "kcs_addr", &slave); + if (rc) { + dev_err(&pdev->dev, "no valid 'kcs_addr' configured\n"); + return ERR_PTR(-EINVAL); + } + + kcs->ioreg = ast_kcs_bmc_ioregs[channel - 1]; + aspeed_kcs_set_address(kcs, slave); + + return 0; +} + +static int aspeed_kcs_calculate_channel(const struct kcs_ioreg *regs) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ast_kcs_bmc_ioregs); i++) { + if (!memcmp(&ast_kcs_bmc_ioregs[i], regs, sizeof(*regs))) + return i + 1; + } + + return -EINVAL; +} + +static struct kcs_bmc *aspeed_kcs_probe_of_v2(struct platform_device *pdev) +{ + struct aspeed_kcs_bmc *priv; + struct device_node *np; + struct kcs_ioreg ioreg; + struct kcs_bmc *kcs; + const __be32 *reg; + int channel; + u32 slave; + int rc; + + np = pdev->dev.of_node; + + /* Don't translate addresses, we want offsets for the regmaps */ + reg = of_get_address(np, 0, NULL, NULL); + if (!reg) + return ERR_PTR(-EINVAL); + ioreg.idr = be32_to_cpup(reg); + + reg = of_get_address(np, 1, NULL, NULL); + if (!reg) + return ERR_PTR(-EINVAL); + ioreg.odr = be32_to_cpup(reg); + + reg = of_get_address(np, 2, NULL, NULL); + if (!reg) + return ERR_PTR(-EINVAL); + ioreg.str = be32_to_cpup(reg); + + channel = aspeed_kcs_calculate_channel(&ioreg); + if (channel < 0) + return ERR_PTR(channel); + + kcs = kcs_bmc_alloc(&pdev->dev, sizeof(struct aspeed_kcs_bmc), channel); + if (!kcs) + return ERR_PTR(-ENOMEM); + + kcs->ioreg = ioreg; + + priv = kcs_bmc_priv(kcs); + priv->map = syscon_node_to_regmap(pdev->dev.parent->of_node); + if (IS_ERR(priv->map)) { + dev_err(&pdev->dev, "Couldn't get regmap\n"); + return ERR_PTR(-ENODEV); + } + + rc = of_property_read_u32(np, "aspeed,lpc-io-reg", &slave); + if (rc) + return ERR_PTR(rc); + + aspeed_kcs_set_address(kcs, slave); + + return kcs; +} + static int aspeed_kcs_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct aspeed_kcs_bmc *priv; struct kcs_bmc *kcs_bmc; - u32 chan, addr; + struct device_node *np; int rc; - rc = of_property_read_u32(dev->of_node, "kcs_chan", &chan); - if ((rc != 0) || (chan == 0 || chan > KCS_CHANNEL_MAX)) { - dev_err(dev, "no valid 'kcs_chan' configured\n"); - return -ENODEV; - } + np = pdev->dev.of_node; + if (of_device_is_compatible(np, "aspeed,ast2400-kcs-bmc") || + of_device_is_compatible(np, "aspeed,ast2500-kcs-bmc")) + kcs_bmc = aspeed_kcs_probe_of_v1(pdev); + else if (of_device_is_compatible(np, "aspeed,ast2400-kcs-bmc-v2") || + of_device_is_compatible(np, "aspeed,ast2500-kcs-bmc-v2")) + kcs_bmc = aspeed_kcs_probe_of_v2(pdev); + else + return -EINVAL; - rc = of_property_read_u32(dev->of_node, "kcs_addr", &addr); - if (rc) { - dev_err(dev, "no valid 'kcs_addr' configured\n"); - return -ENODEV; - } + if (IS_ERR(kcs_bmc)) + return PTR_ERR(kcs_bmc); - kcs_bmc = kcs_bmc_alloc(dev, sizeof(*priv), chan); - if (!kcs_bmc) - return -ENOMEM; - - priv = kcs_bmc_priv(kcs_bmc); - priv->map = syscon_node_to_regmap(dev->parent->of_node); - if (IS_ERR(priv->map)) { - dev_err(dev, "Couldn't get regmap\n"); - return -ENODEV; - } - - kcs_bmc->ioreg = ast_kcs_bmc_ioregs[chan - 1]; kcs_bmc->io_inputb = aspeed_kcs_inb; kcs_bmc->io_outputb = aspeed_kcs_outb; @@ -274,7 +370,6 @@ static int aspeed_kcs_probe(struct platform_device *pdev) dev_set_drvdata(dev, kcs_bmc); - aspeed_kcs_set_address(kcs_bmc, addr); aspeed_kcs_enable_channel(kcs_bmc, true); rc = misc_register(&kcs_bmc->miscdev); @@ -283,9 +378,10 @@ static int aspeed_kcs_probe(struct platform_device *pdev) return rc; } - pr_info("channel=%u addr=0x%x idr=0x%x odr=0x%x str=0x%x\n", - chan, addr, - kcs_bmc->ioreg.idr, kcs_bmc->ioreg.odr, kcs_bmc->ioreg.str); + dev_dbg(&pdev->dev, + "Probed KCS device %d (IDR=0x%x, ODR=0x%x, STR=0x%x)\n", + kcs_bmc->channel, kcs_bmc->ioreg.idr, kcs_bmc->ioreg.odr, + kcs_bmc->ioreg.str); return 0; } @@ -302,6 +398,8 @@ static int aspeed_kcs_remove(struct platform_device *pdev) static const struct of_device_id ast_kcs_bmc_match[] = { { .compatible = "aspeed,ast2400-kcs-bmc" }, { .compatible = "aspeed,ast2500-kcs-bmc" }, + { .compatible = "aspeed,ast2400-kcs-bmc-v2" }, + { .compatible = "aspeed,ast2500-kcs-bmc-v2" }, { } }; MODULE_DEVICE_TABLE(of, ast_kcs_bmc_match); From 904f353d0e508fb4b3a3f902a02b0a028cda33a6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 2 Apr 2020 20:49:48 +0200 Subject: [PATCH 130/280] ASoC: SOF: Turn "firmware boot complete" message into a dbg message Using a Canon Lake machine with the SOF driver causes dmesg to fill up with a ton of these messages: [ 275.902194] sof-audio-pci 0000:00:1f.3: firmware boot complete [ 351.529358] sof-audio-pci 0000:00:1f.3: firmware boot complete [ 560.049047] sof-audio-pci 0000:00:1f.3: firmware boot complete etc. Since the DSP is powered down when not in used this happens everytime e.g. a notification plays, polluting dmesg. Turn this messages into a debug message, matching what the code already does for the ""booting DSP firmware" message. Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200402184948.3014-2-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/sof/loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c index 1f2e0be812bd..64af08293daa 100644 --- a/sound/soc/sof/loader.c +++ b/sound/soc/sof/loader.c @@ -597,7 +597,7 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev) } if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) - dev_info(sdev->dev, "firmware boot complete\n"); + dev_dbg(sdev->dev, "firmware boot complete\n"); else return -EIO; /* FW boots but fw_ready op failed */ From 81630dc042af998b9f58cd8e2c29dab9777ea176 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 2 Apr 2020 20:53:57 +0200 Subject: [PATCH 131/280] ASoC: Intel: atom: Take the drv->lock mutex before calling sst_send_slot_map() sst_send_slot_map() uses sst_fill_and_send_cmd_unlocked() because in some places it is called with the drv->lock mutex already held. So it must always be called with the mutex locked. This commit adds missing locking in the sst_set_be_modules() code-path. Fixes: 24c8d14192cc ("ASoC: Intel: mrfld: add DSP core controls") Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200402185359.3424-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/atom/sst-atom-controls.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/intel/atom/sst-atom-controls.c b/sound/soc/intel/atom/sst-atom-controls.c index f883c9340eee..df8f7994d3b7 100644 --- a/sound/soc/intel/atom/sst-atom-controls.c +++ b/sound/soc/intel/atom/sst-atom-controls.c @@ -966,7 +966,9 @@ static int sst_set_be_modules(struct snd_soc_dapm_widget *w, dev_dbg(c->dev, "Enter: widget=%s\n", w->name); if (SND_SOC_DAPM_EVENT_ON(event)) { + mutex_lock(&drv->lock); ret = sst_send_slot_map(drv); + mutex_unlock(&drv->lock); if (ret) return ret; ret = sst_send_pipe_module_params(w, k); From 0bb2be2d1b78f18ae68633b89ad49d84e0cb9bf6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 2 Apr 2020 20:53:58 +0200 Subject: [PATCH 132/280] ASoC: Intel: atom: Check drv->lock is locked in sst_fill_and_send_cmd_unlocked sst_fill_and_send_cmd_unlocked must be called with the drv->lock mutex locked already. In the past there have been cases where this was not the case, add a WARN_ON to check for drv->lock being locked. Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200402185359.3424-2-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/atom/sst-atom-controls.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/intel/atom/sst-atom-controls.c b/sound/soc/intel/atom/sst-atom-controls.c index df8f7994d3b7..69f3af4524ab 100644 --- a/sound/soc/intel/atom/sst-atom-controls.c +++ b/sound/soc/intel/atom/sst-atom-controls.c @@ -50,6 +50,8 @@ static int sst_fill_and_send_cmd_unlocked(struct sst_data *drv, { int ret = 0; + WARN_ON(!mutex_is_locked(&drv->lock)); + ret = sst_fill_byte_control(drv, ipc_msg, block, task_id, pipe_id, len, cmd_data); if (ret < 0) From c515291d312760ff0ad1d4431f0fb29be5d0ef45 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 2 Apr 2020 20:53:59 +0200 Subject: [PATCH 133/280] ASoC: Intel: atom: Fix uninitialized variable compiler warning GCC 10 gives a "variable might be used uninitialized" warning for the block variable in sst_prepare_and_post_msg(). This is a false-positive warning, but lets fix it anyways. Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200402185359.3424-3-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/atom/sst/sst_pvt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/atom/sst/sst_pvt.c b/sound/soc/intel/atom/sst/sst_pvt.c index 13db2854db3e..053c27707147 100644 --- a/sound/soc/intel/atom/sst/sst_pvt.c +++ b/sound/soc/intel/atom/sst/sst_pvt.c @@ -223,9 +223,9 @@ int sst_prepare_and_post_msg(struct intel_sst_drv *sst, size_t mbox_data_len, const void *mbox_data, void **data, bool large, bool fill_dsp, bool sync, bool response) { + struct sst_block *block = NULL; struct ipc_post *msg = NULL; struct ipc_dsp_hdr dsp_hdr; - struct sst_block *block; int ret = 0, pvt_id; pvt_id = sst_assign_pvt_id(sst); From a6ba632d2c249a4390289727c07b8b55eb02a41d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Apr 2020 11:10:14 -0600 Subject: [PATCH 134/280] io_uring: retry poll if we got woken with non-matching mask If we get woken and the poll doesn't match our mask, re-add the task to the poll waitqueue and try again instead of completing the request with a mask of 0. Reported-by: Dan Melnic Signed-off-by: Jens Axboe --- fs/io_uring.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 10645077d6b4..8ad4a151994d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4412,8 +4412,20 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error) static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt) { struct io_ring_ctx *ctx = req->ctx; + struct io_poll_iocb *poll = &req->poll; + + if (!req->result && !READ_ONCE(poll->canceled)) { + struct poll_table_struct pt = { ._key = poll->events }; + + req->result = vfs_poll(req->file, &pt) & poll->events; + } spin_lock_irq(&ctx->completion_lock); + if (!req->result && !READ_ONCE(poll->canceled)) { + add_wait_queue(poll->head, &poll->wait); + spin_unlock_irq(&ctx->completion_lock); + return; + } hash_del(&req->hash_node); io_poll_complete(req, req->result, 0); req->flags |= REQ_F_COMP_LOCKED; From 3537b6a7c65434d0d2cc0c9862e69be11c367fdc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Apr 2020 11:19:06 -0600 Subject: [PATCH 135/280] io_uring: grab task reference for poll requests We can have a task exit if it's not the owner of the ring. Be safe and grab an actual reference to it, to avoid a potential use-after-free. Reported-by: Dan Melnic Signed-off-by: Jens Axboe --- fs/io_uring.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 8ad4a151994d..b343525a4d2e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -615,10 +615,8 @@ struct io_kiocb { struct list_head list; unsigned int flags; refcount_t refs; - union { - struct task_struct *task; - unsigned long fsize; - }; + struct task_struct *task; + unsigned long fsize; u64 user_data; u32 result; u32 sequence; @@ -1336,6 +1334,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, req->flags = 0; /* one is dropped after submission, the other at completion */ refcount_set(&req->refs, 2); + req->task = NULL; req->result = 0; INIT_IO_WORK(&req->work, io_wq_submit_work); return req; @@ -1372,6 +1371,8 @@ static void __io_req_aux_free(struct io_kiocb *req) kfree(req->io); if (req->file) io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); + if (req->task) + put_task_struct(req->task); io_req_work_drop_env(req); } @@ -4256,10 +4257,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req) req->flags |= REQ_F_POLLED; memcpy(&apoll->work, &req->work, sizeof(req->work)); - /* - * Don't need a reference here, as we're adding it to the task - * task_works list. If the task exits, the list is pruned. - */ + get_task_struct(current); req->task = current; req->apoll = apoll; INIT_HLIST_NODE(&req->hash_node); @@ -4482,10 +4480,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe events = READ_ONCE(sqe->poll_events); poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; - /* - * Don't need a reference here, as we're adding it to the task - * task_works list. If the task exits, the list is pruned. - */ + get_task_struct(current); req->task = current; return 0; } From aa96bf8a9ee33457b7e3ea43e97dfa1e3a15ab20 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Apr 2020 11:26:26 -0600 Subject: [PATCH 136/280] io_uring: use io-wq manager as backup task if task is exiting If the original task is (or has) exited, then the task work will not get queued properly. Allow for using the io-wq manager task to queue this work for execution, and ensure that the io-wq manager notices and runs this work if woken up (or exiting). Reported-by: Dan Melnic Signed-off-by: Jens Axboe --- fs/io-wq.c | 12 ++++++++++++ fs/io-wq.h | 2 ++ fs/io_uring.c | 13 +++++++++---- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index cc5cf2209fb0..4023c9846860 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "io-wq.h" @@ -716,6 +717,9 @@ static int io_wq_manager(void *data) complete(&wq->done); while (!kthread_should_stop()) { + if (current->task_works) + task_work_run(); + for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; bool fork_worker[2] = { false, false }; @@ -738,6 +742,9 @@ static int io_wq_manager(void *data) schedule_timeout(HZ); } + if (current->task_works) + task_work_run(); + return 0; err: set_bit(IO_WQ_BIT_ERROR, &wq->state); @@ -1124,3 +1131,8 @@ void io_wq_destroy(struct io_wq *wq) if (refcount_dec_and_test(&wq->use_refs)) __io_wq_destroy(wq); } + +struct task_struct *io_wq_get_task(struct io_wq *wq) +{ + return wq->manager; +} diff --git a/fs/io-wq.h b/fs/io-wq.h index 3ee7356d6be5..5ba12de7572f 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -136,6 +136,8 @@ typedef bool (work_cancel_fn)(struct io_wq_work *, void *); enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, void *data); +struct task_struct *io_wq_get_task(struct io_wq *wq); + #if defined(CONFIG_IO_WQ) extern void io_wq_worker_sleeping(struct task_struct *); extern void io_wq_worker_running(struct task_struct *); diff --git a/fs/io_uring.c b/fs/io_uring.c index b343525a4d2e..2460c3333f70 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4120,6 +4120,7 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, __poll_t mask, task_work_func_t func) { struct task_struct *tsk; + int ret; /* for instances that support it check for an event match first: */ if (mask && !(mask & poll->events)) @@ -4133,11 +4134,15 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, req->result = mask; init_task_work(&req->task_work, func); /* - * If this fails, then the task is exiting. If that is the case, then - * the exit check will ultimately cancel these work items. Hence we - * don't need to check here and handle it specifically. + * If this fails, then the task is exiting. Punt to one of the io-wq + * threads to ensure the work gets run, we can't always rely on exit + * cancelation taking care of this. */ - task_work_add(tsk, &req->task_work, true); + ret = task_work_add(tsk, &req->task_work, true); + if (unlikely(ret)) { + tsk = io_wq_get_task(req->ctx->io_wq); + task_work_add(tsk, &req->task_work, true); + } wake_up_process(tsk); return 1; } From 562bf7705885855ed8ed5483895ddff509acae7c Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Fri, 3 Apr 2020 17:05:05 +0100 Subject: [PATCH 137/280] ipmi: Add missing annotation for ipmi_ssif_lock_cond() and ipmi_ssif_unlock_cond() Sparse reports a warning at ipmi_ssif_unlock_cond() and ipmi_ssif_lock_cond() warning: context imbalance in ipmi_ssif_lock_cond() - wrong count at exit warning: context imbalance in ipmi_ssif_unlock_cond() - unexpected unlock The root cause is the missing annotation at ipmi_ssif_unlock_cond() and ipmi_ssif_lock_cond() Add the missing __acquires(&ata_scsi_rbuf_lock) Add the missing __releases(&ata_scsi_rbuf_lock) Signed-off-by: Jules Irenge Message-Id: <20200403160505.2832-6-jbi.octave@gmail.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ssif.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 8ac390c2b514..b7145f370d3b 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -313,6 +313,7 @@ static int start_send(struct ssif_info *ssif_info, static unsigned long *ipmi_ssif_lock_cond(struct ssif_info *ssif_info, unsigned long *flags) + __acquires(&ssif_info->lock) { spin_lock_irqsave(&ssif_info->lock, *flags); return flags; @@ -320,6 +321,7 @@ static unsigned long *ipmi_ssif_lock_cond(struct ssif_info *ssif_info, static void ipmi_ssif_unlock_cond(struct ssif_info *ssif_info, unsigned long *flags) + __releases(&ssif_info->lock) { spin_unlock_irqrestore(&ssif_info->lock, *flags); } From 437fb760d046340d0dee3b4307e1cf4578fd8ca8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 2 Apr 2020 12:08:57 +0100 Subject: [PATCH 138/280] pwm: meson: Remove redundant assignment to variable fin_freq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The variable fin_freq is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 8cf9129caa39..bd0d7336b898 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -163,7 +163,7 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, { struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); unsigned int duty, period, pre_div, cnt, duty_cnt; - unsigned long fin_freq = -1; + unsigned long fin_freq; duty = state->duty_cycle; period = state->period; From 374c1104eb72b9570a52360274c3edbbd0e89ed0 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 2 Apr 2020 14:57:18 +0800 Subject: [PATCH 139/280] pwm: Make pwm_apply_state_debug() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following gcc warning: drivers/pwm/core.c:467:6: warning: symbol 'pwm_apply_state_debug' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Jason Yan Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index e9b9283cff28..9973c442b455 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -464,8 +464,8 @@ void pwm_free(struct pwm_device *pwm) } EXPORT_SYMBOL_GPL(pwm_free); -void pwm_apply_state_debug(struct pwm_device *pwm, - const struct pwm_state *state) +static void pwm_apply_state_debug(struct pwm_device *pwm, + const struct pwm_state *state) { struct pwm_state *last = &pwm->last; struct pwm_chip *chip = pwm->chip; From 9cc5f232a4b6a0ef6e9b57876d61b88f61bdd7c2 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Wed, 1 Apr 2020 19:01:06 +0200 Subject: [PATCH 140/280] pwm: pca9685: Fix PWM/GPIO inter-operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver allows pwms to be requested as gpios via gpiolib. Obviously, it should not be allowed to request a GPIO when its corresponding PWM is already requested (and vice versa). So it requires some exclusion code. Given that the PWMm and GPIO cores are not synchronized with respect to each other, this exclusion code will also require proper synchronization. Such a mechanism was in place, but was inadvertently removed by Uwe's clean-up in commit e926b12c611c ("pwm: Clear chip_data in pwm_put()"). Upon revisiting the synchronization mechanism, we found that theoretically, it could allow two threads to successfully request conflicting PWMs/GPIOs. Replace with a bitmap which tracks PWMs in-use, plus a mutex. As long as PWM and GPIO's respective request/free functions modify the in-use bitmap while holding the mutex, proper synchronization will be guaranteed. Reported-by: YueHaibing Fixes: e926b12c611c ("pwm: Clear chip_data in pwm_put()") Cc: Mika Westerberg Cc: Uwe Kleine-König Cc: YueHaibing Link: https://lkml.org/lkml/2019/5/31/963 Signed-off-by: Sven Van Asbroeck Reviewed-by: Mika Westerberg [cg: Tested on an i.MX6Q board with two NXP PCA9685 chips] Tested-by: Clemens Gruber Reviewed-by: Sven Van Asbroeck # cg's rebase Link: https://lore.kernel.org/lkml/20200330160238.GD2817345@ulmo/ Signed-off-by: Thierry Reding --- drivers/pwm/pwm-pca9685.c | 93 ++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 41 deletions(-) diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index 20bdc59a0cbb..76cd22bd6614 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -20,6 +20,7 @@ #include #include #include +#include /* * Because the PCA9685 has only one prescaler per chip, changing the period of @@ -73,6 +74,7 @@ struct pca9685 { #if IS_ENABLED(CONFIG_GPIOLIB) struct mutex lock; struct gpio_chip gpio; + DECLARE_BITMAP(pwms_inuse, PCA9685_MAXCHAN + 1); #endif }; @@ -82,53 +84,53 @@ static inline struct pca9685 *to_pca(struct pwm_chip *chip) } #if IS_ENABLED(CONFIG_GPIOLIB) +static bool pca9685_pwm_test_and_set_inuse(struct pca9685 *pca, int pwm_idx) +{ + bool is_inuse; + + mutex_lock(&pca->lock); + if (pwm_idx >= PCA9685_MAXCHAN) { + /* + * "all LEDs" channel: + * pretend already in use if any of the PWMs are requested + */ + if (!bitmap_empty(pca->pwms_inuse, PCA9685_MAXCHAN)) { + is_inuse = true; + goto out; + } + } else { + /* + * regular channel: + * pretend already in use if the "all LEDs" channel is requested + */ + if (test_bit(PCA9685_MAXCHAN, pca->pwms_inuse)) { + is_inuse = true; + goto out; + } + } + is_inuse = test_and_set_bit(pwm_idx, pca->pwms_inuse); +out: + mutex_unlock(&pca->lock); + return is_inuse; +} + +static void pca9685_pwm_clear_inuse(struct pca9685 *pca, int pwm_idx) +{ + mutex_lock(&pca->lock); + clear_bit(pwm_idx, pca->pwms_inuse); + mutex_unlock(&pca->lock); +} + static int pca9685_pwm_gpio_request(struct gpio_chip *gpio, unsigned int offset) { struct pca9685 *pca = gpiochip_get_data(gpio); - struct pwm_device *pwm; - mutex_lock(&pca->lock); - - pwm = &pca->chip.pwms[offset]; - - if (pwm->flags & (PWMF_REQUESTED | PWMF_EXPORTED)) { - mutex_unlock(&pca->lock); + if (pca9685_pwm_test_and_set_inuse(pca, offset)) return -EBUSY; - } - - pwm_set_chip_data(pwm, (void *)1); - - mutex_unlock(&pca->lock); pm_runtime_get_sync(pca->chip.dev); return 0; } -static bool pca9685_pwm_is_gpio(struct pca9685 *pca, struct pwm_device *pwm) -{ - bool is_gpio = false; - - mutex_lock(&pca->lock); - - if (pwm->hwpwm >= PCA9685_MAXCHAN) { - unsigned int i; - - /* - * Check if any of the GPIOs are requested and in that case - * prevent using the "all LEDs" channel. - */ - for (i = 0; i < pca->gpio.ngpio; i++) - if (gpiochip_is_requested(&pca->gpio, i)) { - is_gpio = true; - break; - } - } else if (pwm_get_chip_data(pwm)) { - is_gpio = true; - } - - mutex_unlock(&pca->lock); - return is_gpio; -} - static int pca9685_pwm_gpio_get(struct gpio_chip *gpio, unsigned int offset) { struct pca9685 *pca = gpiochip_get_data(gpio); @@ -161,6 +163,7 @@ static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset) pca9685_pwm_gpio_set(gpio, offset, 0); pm_runtime_put(pca->chip.dev); + pca9685_pwm_clear_inuse(pca, offset); } static int pca9685_pwm_gpio_get_direction(struct gpio_chip *chip, @@ -212,12 +215,17 @@ static int pca9685_pwm_gpio_probe(struct pca9685 *pca) return devm_gpiochip_add_data(dev, &pca->gpio, pca); } #else -static inline bool pca9685_pwm_is_gpio(struct pca9685 *pca, - struct pwm_device *pwm) +static inline bool pca9685_pwm_test_and_set_inuse(struct pca9685 *pca, + int pwm_idx) { return false; } +static inline void +pca9685_pwm_clear_inuse(struct pca9685 *pca, int pwm_idx) +{ +} + static inline int pca9685_pwm_gpio_probe(struct pca9685 *pca) { return 0; @@ -399,7 +407,7 @@ static int pca9685_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) { struct pca9685 *pca = to_pca(chip); - if (pca9685_pwm_is_gpio(pca, pwm)) + if (pca9685_pwm_test_and_set_inuse(pca, pwm->hwpwm)) return -EBUSY; pm_runtime_get_sync(chip->dev); @@ -408,8 +416,11 @@ static int pca9685_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) static void pca9685_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) { + struct pca9685 *pca = to_pca(chip); + pca9685_pwm_disable(chip, pwm); pm_runtime_put(chip->dev); + pca9685_pwm_clear_inuse(pca, pwm->hwpwm); } static const struct pwm_ops pca9685_pwm_ops = { From 8cd55087dc45b2e1a73ed2a197cbf405f32deb08 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Fri, 3 Apr 2020 16:43:03 +0200 Subject: [PATCH 141/280] loop: Report EOPNOTSUPP properly Properly plumb out EOPNOTSUPP from loop driver operations, which may get returned when for instance a discard operation is attempted but not supported by the underlying block device. Before this change, everything was reported in the log as an I/O error, which is scary and not helpful in debugging. Signed-off-by: Evan Green Reviewed-by: Gwendal Grignou Reviewed-by: Bart Van Assche Signed-off-by: Andrzej Pietrasiewicz Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a42c49e04954..04cbe951862d 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -463,7 +463,7 @@ static void lo_complete_rq(struct request *rq) if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || req_op(rq) != REQ_OP_READ) { if (cmd->ret < 0) - ret = BLK_STS_IOERR; + ret = errno_to_blk_status(cmd->ret); goto end_io; } @@ -1955,7 +1955,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd) failed: /* complete non-aio request */ if (!cmd->use_aio || ret) { - cmd->ret = ret ? -EIO : 0; + if (ret == -EOPNOTSUPP) + cmd->ret = ret; + else + cmd->ret = ret ? -EIO : 0; blk_mq_complete_request(rq); } } From c52abf563049e787c1341cdf15c7dbe1bfbc951b Mon Sep 17 00:00:00 2001 From: Evan Green Date: Fri, 3 Apr 2020 16:43:04 +0200 Subject: [PATCH 142/280] loop: Better discard support for block devices If the backing device for a loop device is itself a block device, then mirror the "write zeroes" capabilities of the underlying block device into the loop device. Copy this capability into both max_write_zeroes_sectors and max_discard_sectors of the loop device. The reason for this is that REQ_OP_DISCARD on a loop device translates into blkdev_issue_zeroout(), rather than blkdev_issue_discard(). This presents a consistent interface for loop devices (that discarded data is zeroed), regardless of the backing device type of the loop device. There should be no behavior change for loop devices backed by regular files. This change fixes blktest block/003, and removes an extraneous error print in block/013 when testing on a loop device backed by a block device that does not support discard. Signed-off-by: Evan Green Reviewed-by: Gwendal Grignou Reviewed-by: Chaitanya Kulkarni [used updated version of Evan's comment in loop_config_discard()] [moved backingq to local scope, removed redundant braces] Signed-off-by: Andrzej Pietrasiewicz Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 04cbe951862d..da693e6a834e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -429,11 +429,12 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, * information. */ struct file *file = lo->lo_backing_file; + struct request_queue *q = lo->lo_queue; int ret; mode |= FALLOC_FL_KEEP_SIZE; - if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) { + if (!blk_queue_discard(q)) { ret = -EOPNOTSUPP; goto out; } @@ -867,28 +868,47 @@ static void loop_config_discard(struct loop_device *lo) struct inode *inode = file->f_mapping->host; struct request_queue *q = lo->lo_queue; + /* + * If the backing device is a block device, mirror its zeroing + * capability. Set the discard sectors to the block device's zeroing + * capabilities because loop discards result in blkdev_issue_zeroout(), + * not blkdev_issue_discard(). This maintains consistent behavior with + * file-backed loop devices: discarded regions read back as zero. + */ + if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) { + struct request_queue *backingq; + + backingq = bdev_get_queue(inode->i_bdev); + blk_queue_max_discard_sectors(q, + backingq->limits.max_write_zeroes_sectors); + + blk_queue_max_write_zeroes_sectors(q, + backingq->limits.max_write_zeroes_sectors); + /* * We use punch hole to reclaim the free space used by the * image a.k.a. discard. However we do not support discard if * encryption is enabled, because it may give an attacker * useful information. */ - if ((!file->f_op->fallocate) || - lo->lo_encrypt_key_size) { + } else if (!file->f_op->fallocate || lo->lo_encrypt_key_size) { q->limits.discard_granularity = 0; q->limits.discard_alignment = 0; blk_queue_max_discard_sectors(q, 0); blk_queue_max_write_zeroes_sectors(q, 0); - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); - return; + + } else { + q->limits.discard_granularity = inode->i_sb->s_blocksize; + q->limits.discard_alignment = 0; + + blk_queue_max_discard_sectors(q, UINT_MAX >> 9); + blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); } - q->limits.discard_granularity = inode->i_sb->s_blocksize; - q->limits.discard_alignment = 0; - - blk_queue_max_discard_sectors(q, UINT_MAX >> 9); - blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); + if (q->limits.max_write_zeroes_sectors) + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); + else + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); } static void loop_unprepare_queue(struct loop_device *lo) From c336e992cb1cb1db9ee608dfb30342ae781057ab Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Apr 2020 13:54:26 -0600 Subject: [PATCH 143/280] io_uring: remove bogus RLIMIT_NOFILE check in file registration We already checked this limit when the file was opened, and we keep it open in the file table. Hence when we added unit_inflight to the count we want to register, we're doubly accounting these files. This results in -EMFILE for file registration, if we're at half the limit. Cc: stable@vger.kernel.org # v5.1+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2460c3333f70..ce76157c2f95 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6216,13 +6216,6 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) struct sk_buff *skb; int i, nr_files; - if (!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { - unsigned long inflight = ctx->user->unix_inflight + nr; - - if (inflight > task_rlimit(current, RLIMIT_NOFILE)) - return -EMFILE; - } - fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); if (!fpl) return -ENOMEM; From 581f981034890dfd27be7e98946e8f0461f3967a Mon Sep 17 00:00:00 2001 From: Bijan Mottahedeh Date: Fri, 3 Apr 2020 13:51:33 -0700 Subject: [PATCH 144/280] io_uring: process requests completed with -EAGAIN on poll list A request that completes with an -EAGAIN result after it has been added to the poll list, will not be removed from that list in io_do_iopoll() because the f_op->iopoll() will not succeed for that request. Maintain a retryable local list similar to the done list, and explicity reissue requests with an -EAGAIN result. Signed-off-by: Bijan Mottahedeh Signed-off-by: Jens Axboe --- fs/io_uring.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ce76157c2f95..78ae8e8ed5bf 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1744,11 +1744,24 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, io_free_req_many(ctx, &rb); } +static void io_iopoll_queue(struct list_head *again) +{ + struct io_kiocb *req; + + do { + req = list_first_entry(again, struct io_kiocb, list); + list_del(&req->list); + refcount_inc(&req->refs); + io_queue_async_work(req); + } while (!list_empty(again)); +} + static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, long min) { struct io_kiocb *req, *tmp; LIST_HEAD(done); + LIST_HEAD(again); bool spin; int ret; @@ -1763,9 +1776,9 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, struct kiocb *kiocb = &req->rw.kiocb; /* - * Move completed entries to our local list. If we find a - * request that requires polling, break out and complete - * the done list first, if we have entries there. + * Move completed and retryable entries to our local lists. + * If we find a request that requires polling, break out + * and complete those lists first, if we have entries there. */ if (req->flags & REQ_F_IOPOLL_COMPLETED) { list_move_tail(&req->list, &done); @@ -1774,6 +1787,13 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, if (!list_empty(&done)) break; + if (req->result == -EAGAIN) { + list_move_tail(&req->list, &again); + continue; + } + if (!list_empty(&again)) + break; + ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); if (ret < 0) break; @@ -1786,6 +1806,9 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, if (!list_empty(&done)) io_iopoll_complete(ctx, nr_events, &done); + if (!list_empty(&again)) + io_iopoll_queue(&again); + return ret; } From 022ac4c9c55be35a2d1f71019a931324c51b0dab Mon Sep 17 00:00:00 2001 From: Yuxian Dai Date: Wed, 1 Apr 2020 19:26:26 +0800 Subject: [PATCH 145/280] drm/amdgpu/powerplay: using the FCLK DPM table to set the MCLK 1.Using the FCLK DPM table to set the MCLK for DPM states consist of three entities: FCLK UCLK MEMCLK All these three clk change together, MEMCLK from FCLK, so use the fclk frequency. 2.we should show the current working clock freqency from clock table metric Signed-off-by: Yuxian Dai Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/renoir_ppt.c | 6 ++++++ drivers/gpu/drm/amd/powerplay/renoir_ppt.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index 7bf52ecba01d..c6b39a7026a8 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -239,6 +239,7 @@ static int renoir_print_clk_levels(struct smu_context *smu, uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0; DpmClocks_t *clk_table = smu->smu_table.clocks_table; SmuMetrics_t metrics; + bool cur_value_match_level = false; if (!clk_table || clk_type >= SMU_CLK_COUNT) return -EINVAL; @@ -297,8 +298,13 @@ static int renoir_print_clk_levels(struct smu_context *smu, GET_DPM_CUR_FREQ(clk_table, clk_type, i, value); size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, cur_value == value ? "*" : ""); + if (cur_value == value) + cur_value_match_level = true; } + if (!cur_value_match_level) + size += sprintf(buf + size, " %uMhz *\n", cur_value); + return size; } diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.h b/drivers/gpu/drm/amd/powerplay/renoir_ppt.h index 2a390ddd37dd..89cd6da118a3 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.h @@ -37,7 +37,7 @@ extern void renoir_set_ppt_funcs(struct smu_context *smu); freq = table->SocClocks[dpm_level].Freq; \ break; \ case SMU_MCLK: \ - freq = table->MemClocks[dpm_level].Freq; \ + freq = table->FClocks[dpm_level].Freq; \ break; \ case SMU_DCEFCLK: \ freq = table->DcfClocks[dpm_level].Freq; \ From 36a5a5816103ea96a24ac93baff8c8dc248aae1e Mon Sep 17 00:00:00 2001 From: Shirish S Date: Thu, 2 Apr 2020 14:40:11 +0530 Subject: [PATCH 146/280] drm/amd/display: re-order asic declarations Fixes build error of: "use of undeclared identifier 'RENOIR_A0'" To fix the same, this patch re-orders the ASIC declarations accordingly. Fixes: 41ef3dcd86443fa ("drm/amd/display: Fix RV2 Variant Detection") Signed-off-by: Shirish S Reviewed-by: Zhan Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/include/dal_asic_id.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 8a87d0ed90ae..2359e88d6029 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -136,6 +136,7 @@ #define RAVEN2_A0 0x81 #define RAVEN1_F0 0xF0 #define RAVEN_UNKNOWN 0xFF +#define RENOIR_A0 0x91 #ifndef ASICREV_IS_RAVEN #define ASICREV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN) #endif @@ -171,8 +172,6 @@ enum { #define ASICREV_IS_NAVI10_P(eChipRev) (eChipRev < NV_NAVI12_P_A0) #define ASICREV_IS_NAVI12_P(eChipRev) ((eChipRev >= NV_NAVI12_P_A0) && (eChipRev < NV_NAVI14_M_A0)) #define ASICREV_IS_NAVI14_M(eChipRev) ((eChipRev >= NV_NAVI14_M_A0) && (eChipRev < NV_UNKNOWN)) -#define RENOIR_A0 0x91 -#define DEVICE_ID_RENOIR_1636 0x1636 // Renoir #define ASICREV_IS_RENOIR(eChipRev) ((eChipRev >= RENOIR_A0) && (eChipRev < RAVEN1_F0)) /* @@ -183,6 +182,9 @@ enum { #define DEVICE_ID_TEMASH_9839 0x9839 #define DEVICE_ID_TEMASH_983D 0x983D +/* RENOIR */ +#define DEVICE_ID_RENOIR_1636 0x1636 + /* Asic Family IDs for different asic family. */ #define FAMILY_CI 120 /* Sea Islands: Hawaii (P), Bonaire (M) */ #define FAMILY_KV 125 /* Fusion => Kaveri: Spectre, Spooky; Kabini: Kalindi */ From 4ee2bb22ddb53a2eafc675690d0d67452029ca37 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 3 Apr 2020 12:26:15 +0800 Subject: [PATCH 147/280] drm/amd/powerplay: implement the is_dpm_running() As the pmfw hasn't exported the interface of SMU feature mask to APU SKU so just force on all the features to driver inquired interface at early initial stage. Signed-off-by: Prike Liang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/renoir_ppt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index c6b39a7026a8..ff73a735b888 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -893,6 +893,17 @@ static int renoir_read_sensor(struct smu_context *smu, return ret; } +static bool renoir_is_dpm_running(struct smu_context *smu) +{ + /* + * Util now, the pmfw hasn't exported the interface of SMU + * feature mask to APU SKU so just force on all the feature + * at early initial stage. + */ + return true; + +} + static const struct pptable_funcs renoir_ppt_funcs = { .get_smu_msg_index = renoir_get_smu_msg_index, .get_smu_clk_index = renoir_get_smu_clk_index, @@ -933,6 +944,7 @@ static const struct pptable_funcs renoir_ppt_funcs = { .mode2_reset = smu_v12_0_mode2_reset, .set_soft_freq_limited_range = smu_v12_0_set_soft_freq_limited_range, .set_driver_table_location = smu_v12_0_set_driver_table_location, + .is_dpm_running = renoir_is_dpm_running, }; void renoir_set_ppt_funcs(struct smu_context *smu) From c5207876232649ca5e5ddd6f966d2da75ffded8f Mon Sep 17 00:00:00 2001 From: Tiecheng Zhou Date: Thu, 2 Apr 2020 16:49:36 +0800 Subject: [PATCH 148/280] drm/amd/powerplay: avoid using pm_en before it is initialized hwmgr->pm_en is initialized at hwmgr_hw_init. during amdgpu_device_init, there is amdgpu_asic_reset that calls to pp_get_asic_baco_capability, while hwmgr->pm_en has not yet been initialized. so avoid using pm_en in pp_get_asic_baco_capability. Reviewed-by: Emily Deng Signed-off-by: Tiecheng Zhou Signed-off-by: Yintian Tao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index c195575366a3..2a12614a12c2 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -1452,7 +1452,8 @@ static int pp_get_asic_baco_state(void *handle, int *state) if (!hwmgr) return -EINVAL; - if (!hwmgr->pm_en || !hwmgr->hwmgr_func->get_asic_baco_state) + if (!(hwmgr->not_vf && amdgpu_dpm) || + !hwmgr->hwmgr_func->get_asic_baco_state) return 0; mutex_lock(&hwmgr->smu_lock); From b74fb888f4927e2079be576ce6dcdbf0c420f1f8 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Fri, 3 Apr 2020 18:02:42 +0800 Subject: [PATCH 149/280] drm/amdgpu: change SH MEM alignment mode for gfx10 Change SH_MEM_CONFIG Alignment mode to Automatic, as: 1)OGL fn_amd_compute_shader will failed with unaligned mode. 2)The default alignment mode was defined to automatic on gfx10 specification. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f6e3f59efa2f..b9664b46de83 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -279,7 +279,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ - (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ + (SH_MEM_ALIGNMENT_MODE_DWORD << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) From 5932d260a8d85a103bd6c504fbb85ff58b156bf9 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Fri, 3 Apr 2020 22:34:19 +0800 Subject: [PATCH 150/280] drm/amdgpu: Fix oops when pp_funcs is unset in ACPI event On ARCTURUS and RENOIR, powerplay is not supported yet. When plug in or unplug power jack, ACPI event will issue. Then kernel NULL pointer BUG will be triggered. Check for NULL pointers before calling. Signed-off-by: Aaron Ma Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index f197f1be0969..abe94a55ecad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -89,7 +89,8 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) adev->pm.ac_power = true; else adev->pm.ac_power = false; - if (adev->powerplay.pp_funcs->enable_bapm) + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->enable_bapm) amdgpu_dpm_enable_bapm(adev, adev->pm.ac_power); mutex_unlock(&adev->pm.mutex); From a032e4f6d60d0aca4f6570d2ad33105a2b9ba385 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 2 Apr 2020 08:48:53 -0700 Subject: [PATCH 151/280] nvmet-rdma: fix bonding failover possible NULL deref RDMA_CM_EVENT_ADDR_CHANGE event occur in the case of bonding failover on normal as well as on listening cm_ids. Hence this event will immediately trigger a NULL dereference trying to disconnect a queue for a cm_id that actually belongs to the port. To fix this we provide a different handler for the listener cm_ids that will defer a work to disable+(re)enable the port which essentially destroys and setups another listener cm_id Reported-by: Alex Lyakas Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Tested-by: Alex Lyakas Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 175 +++++++++++++++++++++++++------------ 1 file changed, 119 insertions(+), 56 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 9e1b8c61f54e..f78201421978 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -105,6 +105,13 @@ struct nvmet_rdma_queue { struct list_head queue_list; }; +struct nvmet_rdma_port { + struct nvmet_port *nport; + struct sockaddr_storage addr; + struct rdma_cm_id *cm_id; + struct delayed_work repair_work; +}; + struct nvmet_rdma_device { struct ib_device *device; struct ib_pd *pd; @@ -917,7 +924,8 @@ static void nvmet_rdma_free_dev(struct kref *ref) static struct nvmet_rdma_device * nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) { - struct nvmet_port *port = cm_id->context; + struct nvmet_rdma_port *port = cm_id->context; + struct nvmet_port *nport = port->nport; struct nvmet_rdma_device *ndev; int inline_page_count; int inline_sge_count; @@ -934,17 +942,17 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) if (!ndev) goto out_err; - inline_page_count = num_pages(port->inline_data_size); + inline_page_count = num_pages(nport->inline_data_size); inline_sge_count = max(cm_id->device->attrs.max_sge_rd, cm_id->device->attrs.max_recv_sge) - 1; if (inline_page_count > inline_sge_count) { pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n", - port->inline_data_size, cm_id->device->name, + nport->inline_data_size, cm_id->device->name, inline_sge_count * PAGE_SIZE); - port->inline_data_size = inline_sge_count * PAGE_SIZE; + nport->inline_data_size = inline_sge_count * PAGE_SIZE; inline_page_count = inline_sge_count; } - ndev->inline_data_size = port->inline_data_size; + ndev->inline_data_size = nport->inline_data_size; ndev->inline_page_count = inline_page_count; ndev->device = cm_id->device; kref_init(&ndev->ref); @@ -1272,6 +1280,7 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { + struct nvmet_rdma_port *port = cm_id->context; struct nvmet_rdma_device *ndev; struct nvmet_rdma_queue *queue; int ret = -EINVAL; @@ -1287,7 +1296,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ret = -ENOMEM; goto put_device; } - queue->port = cm_id->context; + queue->port = port->nport; if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ @@ -1412,7 +1421,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, struct nvmet_rdma_queue *queue) { - struct nvmet_port *port; + struct nvmet_rdma_port *port; if (queue) { /* @@ -1431,7 +1440,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, * cm_id destroy. use atomic xchg to make sure * we don't compete with remove_port. */ - if (xchg(&port->priv, NULL) != cm_id) + if (xchg(&port->cm_id, NULL) != cm_id) return 0; /* @@ -1462,6 +1471,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, nvmet_rdma_queue_established(queue); break; case RDMA_CM_EVENT_ADDR_CHANGE: + if (!queue) { + struct nvmet_rdma_port *port = cm_id->context; + + schedule_delayed_work(&port->repair_work, 0); + break; + } + /* FALLTHROUGH */ case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_TIMEWAIT_EXIT: nvmet_rdma_queue_disconnect(queue); @@ -1504,43 +1520,20 @@ static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl) mutex_unlock(&nvmet_rdma_queue_mutex); } -static int nvmet_rdma_add_port(struct nvmet_port *port) +static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port) { + struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL); + + if (cm_id) + rdma_destroy_id(cm_id); +} + +static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) +{ + struct sockaddr *addr = (struct sockaddr *)&port->addr; struct rdma_cm_id *cm_id; - struct sockaddr_storage addr = { }; - __kernel_sa_family_t af; int ret; - switch (port->disc_addr.adrfam) { - case NVMF_ADDR_FAMILY_IP4: - af = AF_INET; - break; - case NVMF_ADDR_FAMILY_IP6: - af = AF_INET6; - break; - default: - pr_err("address family %d not supported\n", - port->disc_addr.adrfam); - return -EINVAL; - } - - if (port->inline_data_size < 0) { - port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; - } else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { - pr_warn("inline_data_size %u is too large, reducing to %u\n", - port->inline_data_size, - NVMET_RDMA_MAX_INLINE_DATA_SIZE); - port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; - } - - ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, - port->disc_addr.trsvcid, &addr); - if (ret) { - pr_err("malformed ip/port passed: %s:%s\n", - port->disc_addr.traddr, port->disc_addr.trsvcid); - return ret; - } - cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) { @@ -1558,23 +1551,19 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) goto out_destroy_id; } - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); + ret = rdma_bind_addr(cm_id, addr); if (ret) { - pr_err("binding CM ID to %pISpcs failed (%d)\n", - (struct sockaddr *)&addr, ret); + pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret); goto out_destroy_id; } ret = rdma_listen(cm_id, 128); if (ret) { - pr_err("listening to %pISpcs failed (%d)\n", - (struct sockaddr *)&addr, ret); + pr_err("listening to %pISpcs failed (%d)\n", addr, ret); goto out_destroy_id; } - pr_info("enabling port %d (%pISpcs)\n", - le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr); - port->priv = cm_id; + port->cm_id = cm_id; return 0; out_destroy_id: @@ -1582,18 +1571,92 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) return ret; } -static void nvmet_rdma_remove_port(struct nvmet_port *port) +static void nvmet_rdma_repair_port_work(struct work_struct *w) { - struct rdma_cm_id *cm_id = xchg(&port->priv, NULL); + struct nvmet_rdma_port *port = container_of(to_delayed_work(w), + struct nvmet_rdma_port, repair_work); + int ret; - if (cm_id) - rdma_destroy_id(cm_id); + nvmet_rdma_disable_port(port); + ret = nvmet_rdma_enable_port(port); + if (ret) + schedule_delayed_work(&port->repair_work, 5 * HZ); +} + +static int nvmet_rdma_add_port(struct nvmet_port *nport) +{ + struct nvmet_rdma_port *port; + __kernel_sa_family_t af; + int ret; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + + nport->priv = port; + port->nport = nport; + INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work); + + switch (nport->disc_addr.adrfam) { + case NVMF_ADDR_FAMILY_IP4: + af = AF_INET; + break; + case NVMF_ADDR_FAMILY_IP6: + af = AF_INET6; + break; + default: + pr_err("address family %d not supported\n", + nport->disc_addr.adrfam); + ret = -EINVAL; + goto out_free_port; + } + + if (nport->inline_data_size < 0) { + nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; + } else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { + pr_warn("inline_data_size %u is too large, reducing to %u\n", + nport->inline_data_size, + NVMET_RDMA_MAX_INLINE_DATA_SIZE); + nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; + } + + ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, + nport->disc_addr.trsvcid, &port->addr); + if (ret) { + pr_err("malformed ip/port passed: %s:%s\n", + nport->disc_addr.traddr, nport->disc_addr.trsvcid); + goto out_free_port; + } + + ret = nvmet_rdma_enable_port(port); + if (ret) + goto out_free_port; + + pr_info("enabling port %d (%pISpcs)\n", + le16_to_cpu(nport->disc_addr.portid), + (struct sockaddr *)&port->addr); + + return 0; + +out_free_port: + kfree(port); + return ret; +} + +static void nvmet_rdma_remove_port(struct nvmet_port *nport) +{ + struct nvmet_rdma_port *port = nport->priv; + + cancel_delayed_work_sync(&port->repair_work); + nvmet_rdma_disable_port(port); + kfree(port); } static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, - struct nvmet_port *port, char *traddr) + struct nvmet_port *nport, char *traddr) { - struct rdma_cm_id *cm_id = port->priv; + struct nvmet_rdma_port *port = nport->priv; + struct rdma_cm_id *cm_id = port->cm_id; if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { struct nvmet_rdma_rsp *rsp = @@ -1603,7 +1666,7 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, sprintf(traddr, "%pISc", addr); } else { - memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); + memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); } } From 657f1975e9d9c880fa13030e88ba6cc84964f1db Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 2 Apr 2020 09:34:54 -0700 Subject: [PATCH 152/280] nvme: fix deadlock caused by ANA update wrong locking The deadlock combines 4 flows in parallel: - ns scanning (triggered from reconnect) - request timeout - ANA update (triggered from reconnect) - I/O coming into the mpath device (1) ns scanning triggers disk revalidation -> update disk info -> freeze queue -> but blocked, due to (2) (2) timeout handler reference the g_usage_counter - > but blocks in the transport .timeout() handler, due to (3) (3) the transport timeout handler (indirectly) calls nvme_stop_queue() -> which takes the (down_read) namespaces_rwsem - > but blocks, due to (4) (4) ANA update takes the (down_write) namespaces_rwsem -> calls nvme_mpath_set_live() -> which synchronize the ns_head srcu (see commit 504db087aacc) -> but blocks, due to (5) (5) I/O came into nvme_mpath_make_request -> took srcu_read_lock -> direct_make_request > blk_queue_enter -> but blocked, due to (1) ==> the request queue is under freeze -> deadlock. The fix is making ANA update take a read lock as the namespaces list is not manipulated, it is just the ns and ns->head that are being updated (which is protected with the ns->head lock). Fixes: 0d0b660f214dc ("nvme: add ANA support") Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 61bf87592570..54603bd3e02d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -510,7 +510,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, if (!nr_nsids) return 0; - down_write(&ctrl->namespaces_rwsem); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { unsigned nsid = le32_to_cpu(desc->nsids[n]); @@ -521,7 +521,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, if (++n == nr_nsids) break; } - up_write(&ctrl->namespaces_rwsem); + up_read(&ctrl->namespaces_rwsem); return 0; } From 8c5c660529209a0e324c1c1a35ce3f83d67a2aa5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 3 Apr 2020 07:33:20 -0700 Subject: [PATCH 153/280] nvme-fc: Revert "add module to ops template to allow module references" The original patch was to resolve the lldd being able to be unloaded while being used to talk to the boot device of the system. However, the end result of the original patch is that any driver unload while a nvme controller is live via the lldd is now being prohibited. Given the module reference, the module teardown routine can't be called, thus there's no way, other than manual actions to terminate the controllers. Fixes: 863fbae929c7 ("nvme_fc: add module to ops template to allow module references") Cc: # v5.4+ Signed-off-by: James Smart Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 14 ++------------ drivers/nvme/target/fcloop.c | 1 - drivers/scsi/lpfc/lpfc_nvme.c | 2 -- drivers/scsi/qla2xxx/qla_nvme.c | 1 - include/linux/nvme-fc-driver.h | 4 ---- 5 files changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a8bf2fb1287b..7dfc4a2ecf1e 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -342,8 +342,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, !template->ls_req || !template->fcp_io || !template->ls_abort || !template->fcp_abort || !template->max_hw_queues || !template->max_sgl_segments || - !template->max_dif_sgl_segments || !template->dma_boundary || - !template->module) { + !template->max_dif_sgl_segments || !template->dma_boundary) { ret = -EINVAL; goto out_reghost_failed; } @@ -2016,7 +2015,6 @@ nvme_fc_ctrl_free(struct kref *ref) { struct nvme_fc_ctrl *ctrl = container_of(ref, struct nvme_fc_ctrl, ref); - struct nvme_fc_lport *lport = ctrl->lport; unsigned long flags; if (ctrl->ctrl.tagset) { @@ -2043,7 +2041,6 @@ nvme_fc_ctrl_free(struct kref *ref) if (ctrl->ctrl.opts) nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); - module_put(lport->ops->module); } static void @@ -3074,15 +3071,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_fail; } - if (!try_module_get(lport->ops->module)) { - ret = -EUNATCH; - goto out_free_ctrl; - } - idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; - goto out_mod_put; + goto out_free_ctrl; } ctrl->ctrl.opts = opts; @@ -3232,8 +3224,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, out_free_ida: put_device(ctrl->dev); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); -out_mod_put: - module_put(lport->ops->module); out_free_ctrl: kfree(ctrl); out_fail: diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 9861fcea39f6..f69ce66e2d44 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -875,7 +875,6 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) #define FCLOOP_DMABOUND_4G 0xFFFFFFFF static struct nvme_fc_port_template fctemplate = { - .module = THIS_MODULE, .localport_delete = fcloop_localport_delete, .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index f6c8963c915d..db4a04a207ec 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1985,8 +1985,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* Declare and initialization an instance of the FC NVME template. */ static struct nvme_fc_port_template lpfc_nvme_template = { - .module = THIS_MODULE, - /* initiator-based functions */ .localport_delete = lpfc_nvme_localport_delete, .remoteport_delete = lpfc_nvme_remoteport_delete, diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index bfcd02fdf2b8..941aa53363f5 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -610,7 +610,6 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport) } static struct nvme_fc_port_template qla_nvme_fc_transport = { - .module = THIS_MODULE, .localport_delete = qla_nvme_localport_delete, .remoteport_delete = qla_nvme_remoteport_delete, .create_queue = qla_nvme_alloc_queue, diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 6d0d70f3219c..10f81629b9ce 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -270,8 +270,6 @@ struct nvme_fc_remote_port { * * Host/Initiator Transport Entrypoints/Parameters: * - * @module: The LLDD module using the interface - * * @localport_delete: The LLDD initiates deletion of a localport via * nvme_fc_deregister_localport(). However, the teardown is * asynchronous. This routine is called upon the completion of the @@ -385,8 +383,6 @@ struct nvme_fc_remote_port { * Value is Mandatory. Allowed to be zero. */ struct nvme_fc_port_template { - struct module *module; - /* initiator-based functions */ void (*localport_delete)(struct nvme_fc_local_port *); void (*remoteport_delete)(struct nvme_fc_remote_port *); From fd60e0683e8e9107e09cd2e4798f3e27e85d2705 Mon Sep 17 00:00:00 2001 From: Emmanuel Pescosta Date: Sat, 4 Apr 2020 17:38:43 +0200 Subject: [PATCH 154/280] ALSA: usb-audio: Add registration quirk for Kingston HyperX Cloud Alpha S Similar to the Kingston HyperX AMP, the Kingston HyperX Cloud Alpha S (0951:16d8) uses two interfaces, but only the second interface contains the capture stream. This patch delays the registration until the second interface appears. Signed-off-by: Emmanuel Pescosta Link: https://lore.kernel.org/r/20200404153843.9288-1-emmanuelpescosta099@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 86f192a3043d..a8ece1701068 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1827,6 +1827,7 @@ struct registration_quirk { static const struct registration_quirk registration_quirks[] = { REG_QUIRK_ENTRY(0x0951, 0x16d8, 2), /* Kingston HyperX AMP */ + REG_QUIRK_ENTRY(0x0951, 0x16ed, 2), /* Kingston HyperX Cloud Alpha S */ { 0 } /* terminator */ }; From c0f83d164fb8f3a2b7bc379a6c1e27d1123a9eab Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 27 Mar 2020 17:21:26 +0100 Subject: [PATCH 155/280] drm/prime: fix extracting of the DMA addresses from a scatterlist Scatterlist elements contains both pages and DMA addresses, but one should not assume 1:1 relation between them. The sg->length is the size of the physical memory chunk described by the sg->page, while sg_dma_len(sg) is the size of the DMA (IO virtual) chunk described by the sg_dma_address(sg). The proper way of extracting both: pages and DMA addresses of the whole buffer described by a scatterlist it to iterate independently over the sg->pages/sg->length and sg_dma_address(sg)/sg_dma_len(sg) entries. Fixes: 42e67b479eab ("drm/prime: use dma length macro when mapping sg") Signed-off-by: Marek Szyprowski Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20200327162126.29705-1-m.szyprowski@samsung.com Cc: stable@vger.kernel.org --- drivers/gpu/drm/drm_prime.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 1de2cde2277c..282774e469ac 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -962,27 +962,40 @@ int drm_prime_sg_to_page_addr_arrays(struct sg_table *sgt, struct page **pages, unsigned count; struct scatterlist *sg; struct page *page; - u32 len, index; + u32 page_len, page_index; dma_addr_t addr; + u32 dma_len, dma_index; - index = 0; + /* + * Scatterlist elements contains both pages and DMA addresses, but + * one shoud not assume 1:1 relation between them. The sg->length is + * the size of the physical memory chunk described by the sg->page, + * while sg_dma_len(sg) is the size of the DMA (IO virtual) chunk + * described by the sg_dma_address(sg). + */ + page_index = 0; + dma_index = 0; for_each_sg(sgt->sgl, sg, sgt->nents, count) { - len = sg_dma_len(sg); + page_len = sg->length; page = sg_page(sg); + dma_len = sg_dma_len(sg); addr = sg_dma_address(sg); - while (len > 0) { - if (WARN_ON(index >= max_entries)) + while (pages && page_len > 0) { + if (WARN_ON(page_index >= max_entries)) return -1; - if (pages) - pages[index] = page; - if (addrs) - addrs[index] = addr; - + pages[page_index] = page; page++; + page_len -= PAGE_SIZE; + page_index++; + } + while (addrs && dma_len > 0) { + if (WARN_ON(dma_index >= max_entries)) + return -1; + addrs[dma_index] = addr; addr += PAGE_SIZE; - len -= PAGE_SIZE; - index++; + dma_len -= PAGE_SIZE; + dma_index++; } } return 0; From 48bdd849e967f1c573d2b2bc24308e24a83f39c2 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 6 Apr 2020 00:08:52 +0300 Subject: [PATCH 156/280] io_uring: fix ctx refcounting in io_submit_sqes() If io_get_req() fails, it drops a ref. Then, awhile keeping @submitted unmodified, io_submit_sqes() breaks the loop and puts @nr - @submitted refs. For each submitted req a ref is dropped in io_put_req() and friends. So, for @nr taken refs there will be (@nr - @submitted + @submitted + 1) dropped. Remove ctx refcounting from io_get_req(), that at the same time makes it clearer. Fixes: 2b85edfc0c90 ("io_uring: batch getting pcpu references") Cc: stable@vger.kernel.org # v5.6 Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 78ae8e8ed5bf..79bd22289d73 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1342,7 +1342,6 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, req = io_get_fallback_req(ctx); if (req) goto got_it; - percpu_ref_put(&ctx->refs); return NULL; } From 143a3a735dfb1d6eecd7ef14183f823ff4b06027 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 10 Mar 2020 13:47:30 -0700 Subject: [PATCH 157/280] s390/mm: use fallthrough; Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ Signed-off-by: Joe Perches Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/fault.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index d56f67745e3e..1bf091bab918 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -852,9 +852,7 @@ void do_secure_storage_access(struct pt_regs *regs) BUG(); break; case VDSO_FAULT: - /* fallthrough */ case GMAP_FAULT: - /* fallthrough */ default: do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); WARN_ON_ONCE(1); From 3db1db93e34325e14bb29f8f1d904020c409bea6 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 20 Mar 2020 14:00:00 +0100 Subject: [PATCH 158/280] s390/qdio: cleanly split alloc and establish All that qdio_allocate() actually uses from the init_data is the cdev, and the number of Input and Output Queues. Have the driver pass those as parameters, and defer the init_data processing into qdio_establish(). This includes writing per-device(!) trace entries, and most of the sanity checks. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 3 +- drivers/s390/cio/qdio_debug.c | 16 +-------- drivers/s390/cio/qdio_debug.h | 3 +- drivers/s390/cio/qdio_main.c | 56 ++++++++++++++++++++++--------- drivers/s390/net/qeth_core_main.c | 3 +- drivers/s390/scsi/zfcp_qdio.c | 4 +-- 6 files changed, 47 insertions(+), 38 deletions(-) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index e577f8533009..8a0f4d014680 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -408,7 +408,8 @@ int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count); void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count); void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count); -extern int qdio_allocate(struct qdio_initialize *); +extern int qdio_allocate(struct ccw_device *cdev, unsigned int no_input_qs, + unsigned int no_output_qs); extern int qdio_establish(struct qdio_initialize *); extern int qdio_activate(struct ccw_device *); extern void qdio_release_aob(struct qaob *); diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c index 5a3d9ee90a7f..286b044fb027 100644 --- a/drivers/s390/cio/qdio_debug.c +++ b/drivers/s390/cio/qdio_debug.c @@ -58,25 +58,11 @@ static void qdio_clear_dbf_list(void) mutex_unlock(&qdio_dbf_list_mutex); } -int qdio_allocate_dbf(struct qdio_initialize *init_data, - struct qdio_irq *irq_ptr) +int qdio_allocate_dbf(struct qdio_irq *irq_ptr) { char text[QDIO_DBF_NAME_LEN]; struct qdio_dbf_entry *new_entry; - DBF_EVENT("qfmt:%1d", init_data->q_format); - DBF_HEX(init_data->adapter_name, 8); - DBF_EVENT("qpff%4x", init_data->qib_param_field_format); - DBF_HEX(&init_data->qib_param_field, sizeof(void *)); - DBF_HEX(&init_data->input_slib_elements, sizeof(void *)); - DBF_HEX(&init_data->output_slib_elements, sizeof(void *)); - DBF_EVENT("niq:%1d noq:%1d", init_data->no_input_qs, - init_data->no_output_qs); - DBF_HEX(&init_data->input_handler, sizeof(void *)); - DBF_HEX(&init_data->output_handler, sizeof(void *)); - DBF_HEX(&init_data->int_parm, sizeof(long)); - DBF_HEX(&init_data->input_sbal_addr_array, sizeof(void *)); - DBF_HEX(&init_data->output_sbal_addr_array, sizeof(void *)); DBF_EVENT("irq:%8lx", (unsigned long)irq_ptr); /* allocate trace view for the interface */ diff --git a/drivers/s390/cio/qdio_debug.h b/drivers/s390/cio/qdio_debug.h index 122450ba6b90..0dfba085f360 100644 --- a/drivers/s390/cio/qdio_debug.h +++ b/drivers/s390/cio/qdio_debug.h @@ -64,8 +64,7 @@ static inline void DBF_DEV_HEX(struct qdio_irq *dev, void *addr, debug_event(dev->debug_area, level, addr, len); } -int qdio_allocate_dbf(struct qdio_initialize *init_data, - struct qdio_irq *irq_ptr); +int qdio_allocate_dbf(struct qdio_irq *irq_ptr); void qdio_setup_debug_entries(struct qdio_irq *irq_ptr); void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr); int qdio_debug_init(void); diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index c890848064fe..bf617b455428 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1220,27 +1220,21 @@ EXPORT_SYMBOL_GPL(qdio_free); /** * qdio_allocate - allocate qdio queues and associated data - * @init_data: initialization data + * @cdev: associated ccw device + * @no_input_qs: allocate this number of Input Queues + * @no_output_qs: allocate this number of Output Queues */ -int qdio_allocate(struct qdio_initialize *init_data) +int qdio_allocate(struct ccw_device *cdev, unsigned int no_input_qs, + unsigned int no_output_qs) { - struct ccw_device *cdev = init_data->cdev; struct subchannel_id schid; struct qdio_irq *irq_ptr; ccw_device_get_schid(cdev, &schid); DBF_EVENT("qallocate:%4x", schid.sch_no); - if ((init_data->no_input_qs && !init_data->input_handler) || - (init_data->no_output_qs && !init_data->output_handler)) - return -EINVAL; - - if ((init_data->no_input_qs > QDIO_MAX_QUEUES_PER_IRQ) || - (init_data->no_output_qs > QDIO_MAX_QUEUES_PER_IRQ)) - return -EINVAL; - - if ((!init_data->input_sbal_addr_array) || - (!init_data->output_sbal_addr_array)) + if (no_input_qs > QDIO_MAX_QUEUES_PER_IRQ || + no_output_qs > QDIO_MAX_QUEUES_PER_IRQ) return -EINVAL; /* irq_ptr must be in GFP_DMA since it contains ccw1.cda */ @@ -1250,9 +1244,12 @@ int qdio_allocate(struct qdio_initialize *init_data) irq_ptr->cdev = cdev; mutex_init(&irq_ptr->setup_mutex); - if (qdio_allocate_dbf(init_data, irq_ptr)) + if (qdio_allocate_dbf(irq_ptr)) goto out_rel; + DBF_DEV_EVENT(DBF_ERR, irq_ptr, "alloc niq:%1u noq:%1u", no_input_qs, + no_output_qs); + /* * Allocate a page for the chsc calls in qdio_establish. * Must be pre-allocated since a zfcp recovery will call @@ -1268,8 +1265,7 @@ int qdio_allocate(struct qdio_initialize *init_data) if (!irq_ptr->qdr) goto out_rel; - if (qdio_allocate_qs(irq_ptr, init_data->no_input_qs, - init_data->no_output_qs)) + if (qdio_allocate_qs(irq_ptr, no_input_qs, no_output_qs)) goto out_rel; INIT_LIST_HEAD(&irq_ptr->entry); @@ -1305,6 +1301,25 @@ static void qdio_detect_hsicq(struct qdio_irq *irq_ptr) DBF_EVENT("use_cq:%d", use_cq); } +static void qdio_trace_init_data(struct qdio_irq *irq, + struct qdio_initialize *data) +{ + DBF_DEV_EVENT(DBF_ERR, irq, "qfmt:%1u", data->q_format); + DBF_DEV_HEX(irq, data->adapter_name, 8, DBF_ERR); + DBF_DEV_EVENT(DBF_ERR, irq, "qpff%4x", data->qib_param_field_format); + DBF_DEV_HEX(irq, &data->qib_param_field, sizeof(void *), DBF_ERR); + DBF_DEV_HEX(irq, &data->input_slib_elements, sizeof(void *), DBF_ERR); + DBF_DEV_HEX(irq, &data->output_slib_elements, sizeof(void *), DBF_ERR); + DBF_DEV_EVENT(DBF_ERR, irq, "niq:%1u noq:%1u", data->no_input_qs, + data->no_output_qs); + DBF_DEV_HEX(irq, &data->input_handler, sizeof(void *), DBF_ERR); + DBF_DEV_HEX(irq, &data->output_handler, sizeof(void *), DBF_ERR); + DBF_DEV_HEX(irq, &data->int_parm, sizeof(long), DBF_ERR); + DBF_DEV_HEX(irq, &data->input_sbal_addr_array, sizeof(void *), DBF_ERR); + DBF_DEV_HEX(irq, &data->output_sbal_addr_array, sizeof(void *), + DBF_ERR); +} + /** * qdio_establish - establish queues on a qdio subchannel * @init_data: initialization data @@ -1322,7 +1337,16 @@ int qdio_establish(struct qdio_initialize *init_data) if (!irq_ptr) return -ENODEV; + if ((init_data->no_input_qs && !init_data->input_handler) || + (init_data->no_output_qs && !init_data->output_handler)) + return -EINVAL; + + if (!init_data->input_sbal_addr_array || + !init_data->output_sbal_addr_array) + return -EINVAL; + mutex_lock(&irq_ptr->setup_mutex); + qdio_trace_init_data(irq_ptr, init_data); qdio_setup_irq(irq_ptr, init_data); rc = qdio_establish_thinint(irq_ptr); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 24fd17b347fe..19b2773fc2d3 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4893,7 +4893,8 @@ static int qeth_qdio_establish(struct qeth_card *card) if (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_ALLOCATED, QETH_QDIO_ESTABLISHED) == QETH_QDIO_ALLOCATED) { - rc = qdio_allocate(&init_data); + rc = qdio_allocate(CARD_DDEV(card), init_data.no_input_qs, + init_data.no_output_qs); if (rc) { atomic_set(&card->qdio.state, QETH_QDIO_ALLOCATED); goto out; diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index f0d6296e673b..1f693bb82312 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -308,7 +308,6 @@ static void zfcp_qdio_setup_init_data(struct qdio_initialize *id, */ static int zfcp_qdio_allocate(struct zfcp_qdio *qdio) { - struct qdio_initialize init_data; int ret; ret = qdio_alloc_buffers(qdio->req_q, QDIO_MAX_BUFFERS_PER_Q); @@ -319,10 +318,9 @@ static int zfcp_qdio_allocate(struct zfcp_qdio *qdio) if (ret) goto free_req_q; - zfcp_qdio_setup_init_data(&init_data, qdio); init_waitqueue_head(&qdio->req_q_wq); - ret = qdio_allocate(&init_data); + ret = qdio_allocate(qdio->adapter->ccw_device, 1, 1); if (ret) goto free_res_q; From ad96401cdb147efd486761db8159d5e3489c9ca8 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 20 Mar 2020 14:00:00 +0100 Subject: [PATCH 159/280] zfcp: inline zfcp_qdio_setup_init_data() In preparation for a subsequent patch, move the setup of init_data into the only caller. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- drivers/s390/scsi/zfcp_qdio.c | 42 ++++++++++++++--------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 1f693bb82312..c9172047b194 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -277,29 +277,6 @@ int zfcp_qdio_send(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req) return 0; } - -static void zfcp_qdio_setup_init_data(struct qdio_initialize *id, - struct zfcp_qdio *qdio) -{ - memset(id, 0, sizeof(*id)); - id->cdev = qdio->adapter->ccw_device; - id->q_format = QDIO_ZFCP_QFMT; - memcpy(id->adapter_name, dev_name(&id->cdev->dev), 8); - ASCEBC(id->adapter_name, 8); - id->qib_rflags = QIB_RFLAGS_ENABLE_DATA_DIV; - if (enable_multibuffer) - id->qdr_ac |= QDR_AC_MULTI_BUFFER_ENABLE; - id->no_input_qs = 1; - id->no_output_qs = 1; - id->input_handler = zfcp_qdio_int_resp; - id->output_handler = zfcp_qdio_int_req; - id->int_parm = (unsigned long) qdio; - id->input_sbal_addr_array = qdio->res_q; - id->output_sbal_addr_array = qdio->req_q; - id->scan_threshold = - QDIO_MAX_BUFFERS_PER_Q - ZFCP_QDIO_MAX_SBALS_PER_REQ * 2; -} - /** * zfcp_qdio_allocate - allocate queue memory and initialize QDIO data * @qdio: pointer to struct zfcp_qdio @@ -373,7 +350,7 @@ void zfcp_qdio_close(struct zfcp_qdio *qdio) int zfcp_qdio_open(struct zfcp_qdio *qdio) { struct qdio_buffer_element *sbale; - struct qdio_initialize init_data; + struct qdio_initialize init_data = {0}; struct zfcp_adapter *adapter = qdio->adapter; struct ccw_device *cdev = adapter->ccw_device; struct qdio_ssqd_desc ssqd; @@ -385,7 +362,22 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio) atomic_andnot(ZFCP_STATUS_ADAPTER_SIOSL_ISSUED, &qdio->adapter->status); - zfcp_qdio_setup_init_data(&init_data, qdio); + init_data.cdev = cdev; + init_data.q_format = QDIO_ZFCP_QFMT; + memcpy(init_data.adapter_name, dev_name(&cdev->dev), 8); + ASCEBC(init_data.adapter_name, 8); + init_data.qib_rflags = QIB_RFLAGS_ENABLE_DATA_DIV; + if (enable_multibuffer) + init_data.qdr_ac |= QDR_AC_MULTI_BUFFER_ENABLE; + init_data.no_input_qs = 1; + init_data.no_output_qs = 1; + init_data.input_handler = zfcp_qdio_int_resp; + init_data.output_handler = zfcp_qdio_int_req; + init_data.int_parm = (unsigned long) qdio; + init_data.input_sbal_addr_array = qdio->res_q; + init_data.output_sbal_addr_array = qdio->req_q; + init_data.scan_threshold = + QDIO_MAX_BUFFERS_PER_Q - ZFCP_QDIO_MAX_SBALS_PER_REQ * 2; if (qdio_establish(&init_data)) goto failed_establish; From d8564e19da8c8b31c7fc160942f95b113f554211 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 20 Mar 2020 14:00:00 +0100 Subject: [PATCH 160/280] s390/qdio: allow for non-contiguous SBAL array in init_data Upper-layer drivers allocate their SBALs by calling qdio_alloc_buffers() for each individual queue. But when later passing the SBAL addresses to qdio_establish(), they need to be in a single array of pointers. So if the driver uses multiple Input or Output queues, it needs to allocate a temporary array just to present all its SBAL pointers in this layout. This patch slightly changes the format of the QDIO initialization data, so that drivers can pass a per-queue array where each element points to a queue's SBAL array. zfcp doesn't use multiple queues, so the impact there is trivial. For qeth this brings a nice reduction in complexity, and removes a page-sized allocation. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 8 ++--- drivers/s390/cio/qdio_setup.c | 10 +++--- drivers/s390/net/qeth_core.h | 5 +-- drivers/s390/net/qeth_core_main.c | 59 ++++++------------------------- drivers/s390/scsi/zfcp_qdio.c | 6 ++-- 5 files changed, 25 insertions(+), 63 deletions(-) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 8a0f4d014680..2b6292ed0fb6 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -341,8 +341,8 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * @irq_poll: Data IRQ polling handler (NULL when not supported) * @scan_threshold: # of in-use buffers that triggers scan on output queue * @int_parm: interruption parameter - * @input_sbal_addr_array: address of no_input_qs * 128 pointers - * @output_sbal_addr_array: address of no_output_qs * 128 pointers + * @input_sbal_addr_array: per-queue array, each element points to 128 SBALs + * @output_sbal_addr_array: per-queue array, each element points to 128 SBALs * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL) */ struct qdio_initialize { @@ -362,8 +362,8 @@ struct qdio_initialize { void (*irq_poll)(struct ccw_device *cdev, unsigned long data); unsigned int scan_threshold; unsigned long int_parm; - struct qdio_buffer **input_sbal_addr_array; - struct qdio_buffer **output_sbal_addr_array; + struct qdio_buffer ***input_sbal_addr_array; + struct qdio_buffer ***output_sbal_addr_array; struct qdio_outbuf_state *output_sbal_state_array; }; diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index bbbefc9f9e04..3083edd61f0c 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -213,8 +213,6 @@ static void setup_queues(struct qdio_irq *irq_ptr, struct qdio_initialize *qdio_init) { struct qdio_q *q; - struct qdio_buffer **input_sbal_array = qdio_init->input_sbal_addr_array; - struct qdio_buffer **output_sbal_array = qdio_init->output_sbal_addr_array; struct qdio_outbuf_state *output_sbal_state_array = qdio_init->output_sbal_state_array; int i; @@ -225,8 +223,8 @@ static void setup_queues(struct qdio_irq *irq_ptr, q->is_input_q = 1; - setup_storage_lists(q, irq_ptr, input_sbal_array, i); - input_sbal_array += QDIO_MAX_BUFFERS_PER_Q; + setup_storage_lists(q, irq_ptr, + qdio_init->input_sbal_addr_array[i], i); if (is_thinint_irq(irq_ptr)) { tasklet_init(&q->tasklet, tiqdio_inbound_processing, @@ -245,8 +243,8 @@ static void setup_queues(struct qdio_irq *irq_ptr, output_sbal_state_array += QDIO_MAX_BUFFERS_PER_Q; q->is_input_q = 0; - setup_storage_lists(q, irq_ptr, output_sbal_array, i); - output_sbal_array += QDIO_MAX_BUFFERS_PER_Q; + setup_storage_lists(q, irq_ptr, + qdio_init->output_sbal_addr_array[i], i); tasklet_init(&q->tasklet, qdio_outbound_processing, (unsigned long) q); diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index acda230323d5..e0b26310ecab 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -181,11 +181,12 @@ struct qeth_vnicc_info { /*****************************************************************************/ /* QDIO queue and buffer handling */ /*****************************************************************************/ -#define QETH_MAX_QUEUES 4 +#define QETH_MAX_OUT_QUEUES 4 #define QETH_IQD_MIN_TXQ 2 /* One for ucast, one for mcast. */ #define QETH_IQD_MCAST_TXQ 0 #define QETH_IQD_MIN_UCAST_TXQ 1 +#define QETH_MAX_IN_QUEUES 2 #define QETH_RX_COPYBREAK (PAGE_SIZE >> 1) #define QETH_IN_BUF_SIZE_DEFAULT 65536 #define QETH_IN_BUF_COUNT_DEFAULT 64 @@ -539,7 +540,7 @@ struct qeth_qdio_info { /* output */ int no_out_queues; - struct qeth_qdio_out_q *out_qs[QETH_MAX_QUEUES]; + struct qeth_qdio_out_q *out_qs[QETH_MAX_OUT_QUEUES]; struct qdio_outbuf_state *out_bufstates; /* priority queueing */ diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 19b2773fc2d3..4f90f11942b1 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4812,28 +4812,13 @@ static void qeth_determine_capabilities(struct qeth_card *card) return; } -static void qeth_qdio_establish_cq(struct qeth_card *card, - struct qdio_buffer **in_sbal_ptrs) -{ - int i; - - if (card->options.cq == QETH_CQ_ENABLED) { - int offset = QDIO_MAX_BUFFERS_PER_Q * - (card->qdio.no_in_queues - 1); - - for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) - in_sbal_ptrs[offset + i] = - card->qdio.c_q->bufs[i].buffer; - } -} - static int qeth_qdio_establish(struct qeth_card *card) { + struct qdio_buffer **out_sbal_ptrs[QETH_MAX_OUT_QUEUES]; + struct qdio_buffer **in_sbal_ptrs[QETH_MAX_IN_QUEUES]; struct qdio_initialize init_data; char *qib_param_field; - struct qdio_buffer **in_sbal_ptrs; - struct qdio_buffer **out_sbal_ptrs; - int i, j, k; + unsigned int i; int rc = 0; QETH_CARD_TEXT(card, 2, "qdioest"); @@ -4847,32 +4832,12 @@ static int qeth_qdio_establish(struct qeth_card *card) qeth_create_qib_param_field(card, qib_param_field); qeth_create_qib_param_field_blkt(card, qib_param_field); - in_sbal_ptrs = kcalloc(card->qdio.no_in_queues * QDIO_MAX_BUFFERS_PER_Q, - sizeof(void *), - GFP_KERNEL); - if (!in_sbal_ptrs) { - rc = -ENOMEM; - goto out_free_qib_param; - } + in_sbal_ptrs[0] = card->qdio.in_q->qdio_bufs; + if (card->options.cq == QETH_CQ_ENABLED) + in_sbal_ptrs[1] = card->qdio.c_q->qdio_bufs; - for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) - in_sbal_ptrs[i] = card->qdio.in_q->bufs[i].buffer; - - qeth_qdio_establish_cq(card, in_sbal_ptrs); - - out_sbal_ptrs = - kcalloc(card->qdio.no_out_queues * QDIO_MAX_BUFFERS_PER_Q, - sizeof(void *), - GFP_KERNEL); - if (!out_sbal_ptrs) { - rc = -ENOMEM; - goto out_free_in_sbals; - } - - for (i = 0, k = 0; i < card->qdio.no_out_queues; ++i) - for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++, k++) - out_sbal_ptrs[k] = - card->qdio.out_qs[i]->bufs[j]->buffer; + for (i = 0; i < card->qdio.no_out_queues; i++) + out_sbal_ptrs[i] = card->qdio.out_qs[i]->qdio_bufs; memset(&init_data, 0, sizeof(struct qdio_initialize)); init_data.cdev = CARD_DDEV(card); @@ -4917,10 +4882,6 @@ static int qeth_qdio_establish(struct qeth_card *card) break; } out: - kfree(out_sbal_ptrs); -out_free_in_sbals: - kfree(in_sbal_ptrs); -out_free_qib_param: kfree(qib_param_field); out_free_nothing: return rc; @@ -5986,7 +5947,7 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card) switch (card->info.type) { case QETH_CARD_TYPE_IQD: dev = alloc_netdev_mqs(sizeof(*priv), "hsi%d", NET_NAME_UNKNOWN, - ether_setup, QETH_MAX_QUEUES, 1); + ether_setup, QETH_MAX_OUT_QUEUES, 1); break; case QETH_CARD_TYPE_OSM: dev = alloc_etherdev(sizeof(*priv)); @@ -5996,7 +5957,7 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card) ether_setup); break; default: - dev = alloc_etherdev_mqs(sizeof(*priv), QETH_MAX_QUEUES, 1); + dev = alloc_etherdev_mqs(sizeof(*priv), QETH_MAX_OUT_QUEUES, 1); } if (!dev) diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index c9172047b194..531e2a985d66 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -349,6 +349,8 @@ void zfcp_qdio_close(struct zfcp_qdio *qdio) */ int zfcp_qdio_open(struct zfcp_qdio *qdio) { + struct qdio_buffer **input_sbals[1] = {qdio->res_q}; + struct qdio_buffer **output_sbals[1] = {qdio->req_q}; struct qdio_buffer_element *sbale; struct qdio_initialize init_data = {0}; struct zfcp_adapter *adapter = qdio->adapter; @@ -374,8 +376,8 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio) init_data.input_handler = zfcp_qdio_int_resp; init_data.output_handler = zfcp_qdio_int_req; init_data.int_parm = (unsigned long) qdio; - init_data.input_sbal_addr_array = qdio->res_q; - init_data.output_sbal_addr_array = qdio->req_q; + init_data.input_sbal_addr_array = input_sbals; + init_data.output_sbal_addr_array = output_sbals; init_data.scan_threshold = QDIO_MAX_BUFFERS_PER_Q - ZFCP_QDIO_MAX_SBALS_PER_REQ * 2; From 1da1092dbf61a0c0aab02048232f5e9fcab15861 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 20 Mar 2020 14:00:00 +0100 Subject: [PATCH 161/280] s390/qdio: remove cdev from init_data It's no longer needed. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 5 ++--- drivers/s390/cio/qdio_main.c | 5 +++-- drivers/s390/net/qeth_core_main.c | 3 +-- drivers/s390/scsi/zfcp_qdio.c | 5 ++--- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 2b6292ed0fb6..86a3796e9be8 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -325,7 +325,6 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, /** * struct qdio_initialize - qdio initialization data - * @cdev: associated ccw device * @q_format: queue format * @qdr_ac: feature flags to set * @adapter_name: name for the adapter @@ -346,7 +345,6 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL) */ struct qdio_initialize { - struct ccw_device *cdev; unsigned char q_format; unsigned char qdr_ac; unsigned char adapter_name[8]; @@ -410,7 +408,8 @@ void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count); extern int qdio_allocate(struct ccw_device *cdev, unsigned int no_input_qs, unsigned int no_output_qs); -extern int qdio_establish(struct qdio_initialize *); +extern int qdio_establish(struct ccw_device *cdev, + struct qdio_initialize *init_data); extern int qdio_activate(struct ccw_device *); extern void qdio_release_aob(struct qaob *); extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int, diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index bf617b455428..9d6e51bcd072 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1322,11 +1322,12 @@ static void qdio_trace_init_data(struct qdio_irq *irq, /** * qdio_establish - establish queues on a qdio subchannel + * @cdev: associated ccw device * @init_data: initialization data */ -int qdio_establish(struct qdio_initialize *init_data) +int qdio_establish(struct ccw_device *cdev, + struct qdio_initialize *init_data) { - struct ccw_device *cdev = init_data->cdev; struct qdio_irq *irq_ptr = cdev->private->qdio_data; struct subchannel_id schid; int rc; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 4f90f11942b1..f7689461c242 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4840,7 +4840,6 @@ static int qeth_qdio_establish(struct qeth_card *card) out_sbal_ptrs[i] = card->qdio.out_qs[i]->qdio_bufs; memset(&init_data, 0, sizeof(struct qdio_initialize)); - init_data.cdev = CARD_DDEV(card); init_data.q_format = IS_IQD(card) ? QDIO_IQDIO_QFMT : QDIO_QETH_QFMT; init_data.qib_param_field_format = 0; @@ -4864,7 +4863,7 @@ static int qeth_qdio_establish(struct qeth_card *card) atomic_set(&card->qdio.state, QETH_QDIO_ALLOCATED); goto out; } - rc = qdio_establish(&init_data); + rc = qdio_establish(CARD_DDEV(card), &init_data); if (rc) { atomic_set(&card->qdio.state, QETH_QDIO_ALLOCATED); qdio_free(CARD_DDEV(card)); diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 531e2a985d66..26702b56a7ab 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -364,7 +364,6 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio) atomic_andnot(ZFCP_STATUS_ADAPTER_SIOSL_ISSUED, &qdio->adapter->status); - init_data.cdev = cdev; init_data.q_format = QDIO_ZFCP_QFMT; memcpy(init_data.adapter_name, dev_name(&cdev->dev), 8); ASCEBC(init_data.adapter_name, 8); @@ -381,10 +380,10 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio) init_data.scan_threshold = QDIO_MAX_BUFFERS_PER_Q - ZFCP_QDIO_MAX_SBALS_PER_REQ * 2; - if (qdio_establish(&init_data)) + if (qdio_establish(cdev, &init_data)) goto failed_establish; - if (qdio_get_ssqd_desc(init_data.cdev, &ssqd)) + if (qdio_get_ssqd_desc(cdev, &ssqd)) goto failed_qdio; if (ssqd.qdioac2 & CHSC_AC2_DATA_DIV_ENABLED) From a8a4ee2740bb6224271f06742884ec375c42cb9e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 20 Mar 2020 14:00:00 +0100 Subject: [PATCH 162/280] s390/qdio: inline shared_ind() This is just prep work for a subsequent patch, no functional change. For the non-polling path we can pull the code chunk in front of the for-loop, since it only evaluates to true for a 1-queue configuration. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio_thinint.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index ea09df7209f0..c78c8dd18a89 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -92,17 +92,12 @@ static inline int references_shared_dsci(struct qdio_irq *irq_ptr) return irq_ptr->dsci == &q_indicators[TIQDIO_SHARED_IND].ind; } -static inline int shared_ind(struct qdio_irq *irq_ptr) -{ - return references_shared_dsci(irq_ptr) || - has_multiple_inq_on_dsci(irq_ptr); -} - void clear_nonshared_ind(struct qdio_irq *irq_ptr) { if (!is_thinint_irq(irq_ptr)) return; - if (shared_ind(irq_ptr)) + if (references_shared_dsci(irq_ptr) || + has_multiple_inq_on_dsci(irq_ptr)) return; xchg(irq_ptr->dsci, 0); } @@ -111,7 +106,8 @@ int test_nonshared_ind(struct qdio_irq *irq_ptr) { if (!is_thinint_irq(irq_ptr)) return 0; - if (shared_ind(irq_ptr)) + if (references_shared_dsci(irq_ptr) || + has_multiple_inq_on_dsci(irq_ptr)) return 0; if (*irq_ptr->dsci) return 1; @@ -144,10 +140,11 @@ static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq) return; } - for_each_input_queue(irq, q, i) { - if (!shared_ind(irq)) - xchg(irq->dsci, 0); + if (!references_shared_dsci(irq) && + !has_multiple_inq_on_dsci(irq)) + xchg(irq->dsci, 0); + for_each_input_queue(irq, q, i) { /* * Call inbound processing but not directly * since that could starve other thinint queues. From 9c159bbc14ba196d590dc1a2fe7931ccfe73db98 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 20 Mar 2020 14:00:00 +0100 Subject: [PATCH 163/280] s390/qdio: clear DSCI early for polling drivers Polling drivers in a configuration with 1 Input Queue currently keep their DSCI armed all the way through the poll cycle, until qdio_start_irq() clears it. _Any_ intermittent QDIO interrupt delivered to tiqdio_thinint_handler() will thus cause 1) the 'adapter_int' statistic to be incremented, 2) a call to tiqdio_call_inq_handlers() for this device, and then 3) the 'int_discarded' statistics to be incremented. This causes overhead & complexity in the IRQ path, along with ambiguity in the statistics. On the other hand the device should be in IRQ avoidance mode during a poll cycle, so there won't be a lot of DSCI ping-pong that this micro-optimization could prevent. So align the DSCI handling with what we already do for devices with multiple Input Queues: clear it right away while processing the IRQ. For the non-polling path this means that we no longer need to handle the 1-queue case separately. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio.h | 1 - drivers/s390/cio/qdio_main.c | 2 -- drivers/s390/cio/qdio_thinint.c | 25 ++----------------------- 3 files changed, 2 insertions(+), 26 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index b0beafc43d37..b8453b594679 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -374,7 +374,6 @@ int tiqdio_allocate_memory(void); void tiqdio_free_memory(void); int tiqdio_register_thinints(void); void tiqdio_unregister_thinints(void); -void clear_nonshared_ind(struct qdio_irq *); int test_nonshared_ind(struct qdio_irq *); /* prototypes for setup */ diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 9d6e51bcd072..bcc3ab14e72d 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1643,8 +1643,6 @@ int qdio_start_irq(struct ccw_device *cdev) if (!irq_ptr) return -ENODEV; - clear_nonshared_ind(irq_ptr); - for_each_input_queue(irq_ptr, q, i) qdio_stop_polling(q); diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index c78c8dd18a89..ae50373617cd 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -82,32 +82,16 @@ void tiqdio_remove_device(struct qdio_irq *irq_ptr) INIT_LIST_HEAD(&irq_ptr->entry); } -static inline int has_multiple_inq_on_dsci(struct qdio_irq *irq_ptr) -{ - return irq_ptr->nr_input_qs > 1; -} - static inline int references_shared_dsci(struct qdio_irq *irq_ptr) { return irq_ptr->dsci == &q_indicators[TIQDIO_SHARED_IND].ind; } -void clear_nonshared_ind(struct qdio_irq *irq_ptr) -{ - if (!is_thinint_irq(irq_ptr)) - return; - if (references_shared_dsci(irq_ptr) || - has_multiple_inq_on_dsci(irq_ptr)) - return; - xchg(irq_ptr->dsci, 0); -} - int test_nonshared_ind(struct qdio_irq *irq_ptr) { if (!is_thinint_irq(irq_ptr)) return 0; - if (references_shared_dsci(irq_ptr) || - has_multiple_inq_on_dsci(irq_ptr)) + if (references_shared_dsci(irq_ptr)) return 0; if (*irq_ptr->dsci) return 1; @@ -127,8 +111,7 @@ static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq) struct qdio_q *q; int i; - if (!references_shared_dsci(irq) && - has_multiple_inq_on_dsci(irq)) + if (!references_shared_dsci(irq)) xchg(irq->dsci, 0); if (irq->irq_poll) { @@ -140,10 +123,6 @@ static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq) return; } - if (!references_shared_dsci(irq) && - !has_multiple_inq_on_dsci(irq)) - xchg(irq->dsci, 0); - for_each_input_queue(irq, q, i) { /* * Call inbound processing but not directly From 05ce3e53f375295c2940390b2b429e506e07655c Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 27 Mar 2020 13:45:02 +0100 Subject: [PATCH 164/280] s390/cio: avoid duplicated 'ADD' uevents The common I/O layer delays the ADD uevent for subchannels and delegates generating this uevent to the individual subchannel drivers. The io_subchannel driver will do so when the associated ccw_device has been registered -- but unconditionally, so more ADD uevents will be generated if a subchannel has been unbound from the io_subchannel driver and later rebound. To fix this, only generate the ADD event if uevents were still suppressed for the device. Fixes: fa1a8c23eb7d ("s390: cio: Delay uevents for subchannels") Message-Id: <20200327124503.9794-2-cohuck@redhat.com> Reported-by: Boris Fiuczynski Reviewed-by: Peter Oberparleiter Reviewed-by: Boris Fiuczynski Signed-off-by: Cornelia Huck Signed-off-by: Vasily Gorbik --- drivers/s390/cio/device.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 50007cb9be5b..b29fe8d50baf 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -849,8 +849,10 @@ static void io_subchannel_register(struct ccw_device *cdev) * Now we know this subchannel will stay, we can throw * our delayed uevent. */ - dev_set_uevent_suppress(&sch->dev, 0); - kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + if (dev_get_uevent_suppress(&sch->dev)) { + dev_set_uevent_suppress(&sch->dev, 0); + kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + } /* make it known to the system */ ret = ccw_device_add(cdev); if (ret) { @@ -1058,8 +1060,11 @@ static int io_subchannel_probe(struct subchannel *sch) * Throw the delayed uevent for the subchannel, register * the ccw_device and exit. */ - dev_set_uevent_suppress(&sch->dev, 0); - kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + if (dev_get_uevent_suppress(&sch->dev)) { + /* should always be the case for the console */ + dev_set_uevent_suppress(&sch->dev, 0); + kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + } cdev = sch_get_cdev(sch); rc = ccw_device_add(cdev); if (rc) { From 2bc55eaeb88d30accfc1b6ac2708d4e4b81ca260 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 27 Mar 2020 13:45:03 +0100 Subject: [PATCH 165/280] s390/cio: generate delayed uevent for vfio-ccw subchannels The common I/O layer delays the ADD uevent for subchannels and delegates generating this uevent to the individual subchannel drivers. The vfio-ccw I/O subchannel driver, however, did not do that, and will not generate an ADD uevent for subchannels that had not been bound to a different driver (or none at all, which also triggers the uevent). Generate the ADD uevent at the end of the probe function if uevents were still suppressed for the device. Message-Id: <20200327124503.9794-3-cohuck@redhat.com> Fixes: 63f1934d562d ("vfio: ccw: basic implementation for vfio_ccw driver") Reviewed-by: Eric Farman Signed-off-by: Cornelia Huck Signed-off-by: Vasily Gorbik --- drivers/s390/cio/vfio_ccw_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index e401a3d0aa57..339a6bc0339b 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -167,6 +167,11 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) if (ret) goto out_disable; + if (dev_get_uevent_suppress(&sch->dev)) { + dev_set_uevent_suppress(&sch->dev, 0); + kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + } + VFIO_CCW_MSG_EVENT(4, "bound to subchannel %x.%x.%04x\n", sch->schid.cssid, sch->schid.ssid, sch->schid.sch_no); From b513b0d4563ab70a0a25e7a0ed94afaf88ac28bf Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 19 Mar 2020 11:04:21 +0100 Subject: [PATCH 166/280] drm/virtio: fix OOB in virtio_gpu_object_create After commit f651c8b05542 ("drm/virtio: factor out the sg_table from virtio_gpu_object"), virtio_gpu_create_object allocates too small space to fit everything in. It is because it allocates struct virtio_gpu_object, but should allocate a newly added struct virtio_gpu_object_shmem which has 2 more members. So fix that by using correct type in virtio_gpu_create_object. Signed-off-by: Jiri Slaby Link: http://patchwork.freedesktop.org/patch/msgid/20200319100421.16267-1-jslaby@suse.cz Fixes: f651c8b05542 ("drm/virtio: factor out the sg_table from virtio_gpu_object") Cc: Gurchetan Singh Cc: Gerd Hoffmann Signed-off-by: Gerd Hoffmann (cherry picked from commit 0666a8d7f6a4530440e59f2d22ed4091f4d3818c) --- drivers/gpu/drm/virtio/virtgpu_object.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 2bfb13d1932e..d9039bb7c5e3 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -123,15 +123,17 @@ bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo) struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev, size_t size) { - struct virtio_gpu_object *bo; + struct virtio_gpu_object_shmem *shmem; + struct drm_gem_shmem_object *dshmem; - bo = kzalloc(sizeof(*bo), GFP_KERNEL); - if (!bo) + shmem = kzalloc(sizeof(*shmem), GFP_KERNEL); + if (!shmem) return NULL; - bo->base.base.funcs = &virtio_gpu_shmem_funcs; - bo->base.map_cached = true; - return &bo->base.base; + dshmem = &shmem->base.base; + dshmem->base.funcs = &virtio_gpu_shmem_funcs; + dshmem->map_cached = true; + return &dshmem->base; } static int virtio_gpu_object_shmem_init(struct virtio_gpu_device *vgdev, From 7506baeed8d05fc164254c64af14cfed2ac14446 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 5 Apr 2020 17:03:48 +0200 Subject: [PATCH 167/280] ASoC: stm32: sai: Add missing cleanup The commit 0d6defc7e0e4 ("ASoC: stm32: sai: manage rebind issue") converts some function calls to their non-devm equivalents. The appropriate cleanup code was added to the remove function, but not to the probe function. Add a call to snd_dmaengine_pcm_unregister to compensate for the call to snd_dmaengine_pcm_register in case of subsequent failure. Fixes: commit 0d6defc7e0e4 ("ASoC: stm32: sai: manage rebind issue") Signed-off-by: Julia Lawall Acked-by: Olivier Moysan Link: https://lore.kernel.org/r/1586099028-5104-1-git-send-email-Julia.Lawall@inria.fr Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 2bd280c01c33..0d0c9afd8791 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -1556,8 +1556,10 @@ static int stm32_sai_sub_probe(struct platform_device *pdev) ret = snd_soc_register_component(&pdev->dev, &stm32_component, &sai->cpu_dai_drv, 1); - if (ret) + if (ret) { + snd_dmaengine_pcm_unregister(&pdev->dev); return ret; + } if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) conf = &stm32_sai_pcm_config_spdif; From c8b78f24c1247b7bd0882885c672d9dec5800bc6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 5 Apr 2020 15:37:26 +0200 Subject: [PATCH 168/280] ASoC: Intel: bytcr_rt5640: Add quirk for MPMAN MPWIN895CL tablet The MPMAN MPWIN895CL tablet almost fully works with out default settings. The only problem is that it has only 1 speaker so any sounds only playing on the right channel get lost. Add a quirk for this model using the default settings + MONO_SPEAKER. Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200405133726.24154-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 33fb8ea4e5cb..08f4ae964b02 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -591,6 +591,17 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF1 | BYT_RT5640_MCLK_EN), }, + { + /* MPMAN MPWIN895CL */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "MPMAN"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MPWIN8900CL"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_MONO_SPEAKER | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { /* MSI S100 tablet */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Micro-Star International Co., Ltd."), From d7e0481c0d0f0d2e73d4f1e675cb89d0308d0f7c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 6 Apr 2020 17:40:27 +0200 Subject: [PATCH 169/280] ACPI: EC: Fix up fast path check in acpi_ec_add() The fast path check in acpi_ec_add() is not incorrect, because in fact acpi_device_hid(device) can be equal to ACPI_ECDT_HID only if boot_ec is not NULL, but it may confuse static checkers, so change it to explicitly check boot_ec upfront and use the slow path if that pointer is NULL. Link: https://lore.kernel.org/linux-acpi/20200406144217.GA68494@mwanda/ Fixes: 3d9b8dd8320d ("ACPI: EC: Use fast path in acpi_ec_add() for DSDT boot EC") Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 4816df520f72..b4c0152e92aa 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1589,8 +1589,8 @@ static int acpi_ec_add(struct acpi_device *device) strcpy(acpi_device_name(device), ACPI_EC_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_EC_CLASS); - if ((boot_ec && boot_ec->handle == device->handle) || - !strcmp(acpi_device_hid(device), ACPI_ECDT_HID)) { + if (boot_ec && (boot_ec->handle == device->handle || + !strcmp(acpi_device_hid(device), ACPI_ECDT_HID))) { /* Fast path: this device corresponds to the boot EC. */ ec = boot_ec; } else { From 54fbdd1035e3a4e4f4082c335b095426cdefd092 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Apr 2020 11:45:37 -0700 Subject: [PATCH 170/280] xfs: factor out a new xfs_log_force_inode helper Create a new helper to force the log up to the last LSN touching an inode. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_export.c | 14 +------------- fs/xfs/xfs_file.c | 12 +----------- fs/xfs/xfs_inode.c | 19 +++++++++++++++++++ fs/xfs/xfs_inode.h | 1 + 4 files changed, 22 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index f1372f9046e3..5a4b0119143a 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -15,7 +15,6 @@ #include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_icache.h" -#include "xfs_log.h" #include "xfs_pnfs.h" /* @@ -221,18 +220,7 @@ STATIC int xfs_fs_nfs_commit_metadata( struct inode *inode) { - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - xfs_lsn_t lsn = 0; - - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (xfs_ipincount(ip)) - lsn = ip->i_itemp->ili_last_lsn; - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (!lsn) - return 0; - return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); + return xfs_log_force_inode(XFS_I(inode)); } const struct export_operations xfs_export_operations = { diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index b8a4a3f29b36..68e1cbb3cfcc 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -80,19 +80,9 @@ xfs_dir_fsync( int datasync) { struct xfs_inode *ip = XFS_I(file->f_mapping->host); - struct xfs_mount *mp = ip->i_mount; - xfs_lsn_t lsn = 0; trace_xfs_dir_fsync(ip); - - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (xfs_ipincount(ip)) - lsn = ip->i_itemp->ili_last_lsn; - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (!lsn) - return 0; - return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); + return xfs_log_force_inode(ip); } STATIC int diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ae86c870da92..d1772786af29 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3945,3 +3945,22 @@ xfs_irele( trace_xfs_irele(ip, _RET_IP_); iput(VFS_I(ip)); } + +/* + * Ensure all commited transactions touching the inode are written to the log. + */ +int +xfs_log_force_inode( + struct xfs_inode *ip) +{ + xfs_lsn_t lsn = 0; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (xfs_ipincount(ip)) + lsn = ip->i_itemp->ili_last_lsn; + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + if (!lsn) + return 0; + return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL); +} diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 492e53992fa9..c6a63f6764a6 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -426,6 +426,7 @@ int xfs_itruncate_extents_flags(struct xfs_trans **, struct xfs_inode *, int, xfs_fsize_t, int); void xfs_iext_realloc(xfs_inode_t *, int, int); +int xfs_log_force_inode(struct xfs_inode *ip); void xfs_iunpin_wait(xfs_inode_t *); #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) From 5833112df7e9a306af9af09c60127b92ed723962 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Apr 2020 11:45:37 -0700 Subject: [PATCH 171/280] xfs: reflink should force the log out if mounted with wsync Reflink should force the log out to disk if the filesystem was mounted with wsync, the same as most other operations in xfs. [Note: XFS_MOUNT_WSYNC is set when the admin mounts the filesystem with either the 'wsync' or 'sync' mount options, which effectively means that we're classifying reflink/dedupe as IO operations and making them synchronous when required.] Fixes: 3fc9f5e409319 ("xfs: remove xfs_reflink_remap_range") Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster [darrick: add more to the changelog] Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 68e1cbb3cfcc..4b8bdecc3863 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1059,7 +1059,11 @@ xfs_file_remap_range( ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, remap_flags); + if (ret) + goto out_unlock; + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_log_force_inode(dest); out_unlock: xfs_reflink_remap_unlock(file_in, file_out); if (ret) From 4146575eb0f0cb41e6d909234b654064d1bed183 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 2 Apr 2020 20:52:57 +0200 Subject: [PATCH 172/280] ASoC: rt5645: Add platform-data for Medion E1239T The Medion E1239T uses the default jack-detect mode 3, but instead of using an analog microphone it is using a DMIC on dmic-data-pin 1, like other models following Intel's Brasswell's reference design. This commit adds a DMI quirk pointing to the intel_braswell_platform_data for this model. Signed-off-by: Hans de Goede Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200402185257.3355-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 92d67010aeed..6ba1849a77b0 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3758,6 +3758,14 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&kahlee_platform_data, }, + { + .ident = "Medion E1239T", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "MEDION"), + DMI_MATCH(DMI_PRODUCT_NAME, "E1239T MD60568"), + }, + .driver_data = (void *)&intel_braswell_platform_data, + }, { } }; From 6e8a36c13382b7165d23928caee8d91c1b301142 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 30 Mar 2020 18:22:44 +0300 Subject: [PATCH 173/280] drm/i915/icl+: Don't enable DDI IO power on a TypeC port in TBT mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DDI IO power well must not be enabled for a TypeC port in TBT mode, ensure this during driver loading/system resume. This gets rid of error messages like [drm] *ERROR* power well DDI E TC2 IO state mismatch (refcount 1/enabled 0) and avoids leaking the power ref when disabling the output. Cc: # v5.4+ Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200330152244.11316-1-imre.deak@intel.com (cherry picked from commit f77a2db27f26c3ccba0681f7e89fef083718f07f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_ddi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 73d0f4648c06..5202fdec8e0a 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1869,7 +1869,11 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder, return; dig_port = enc_to_dig_port(encoder); - intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); + + if (!intel_phy_is_tc(dev_priv, phy) || + dig_port->tc_mode != TC_PORT_TBT_ALT) + intel_display_power_get(dev_priv, + dig_port->ddi_io_power_domain); /* * AUX power is only needed for (e)DP mode, and for HDMI mode on TC From 8262b49209c410d9df063e5ddc3168e210b28422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Mon, 30 Mar 2020 14:00:42 -0700 Subject: [PATCH 174/280] drm/i915/dp: Return the right vswing tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DDI ports have its encoders initialized with INTEL_OUTPUT_DDI type and later eDP ports that have the type changed to INTEL_OUTPUT_EDP. But for all other DDI ports it can drive HDMI or DP depending on what user connects to the ports. ehl_get_combo_buf_trans() and tgl_get_combo_buf_trans() was checking for INTEL_OUTPUT_DP that was never true, causing wrong vswing tables being used. So here replacing the INTEL_OUTPUT_DP checks by the valid output types that this functions receives as parameters. HDMI cases will be correctly handled as it do not use encoder->type, instead it calls the functions with INTEL_OUTPUT_HDMI as type parameter and HDMI don't have retraining. v2: changed INTEL_OUTPUT_DDI to INTEL_OUTPUT_EDP and INTEL_OUTPUT_HDMI Fixes: bd3cf6f7ce20 ("drm/i915/dp/tgl+: Update combo phy vswing tables") Cc: Clinton A Taylor Cc: Matt Roper Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200330210044.130510-1-jose.souza@intel.com (cherry picked from commit 70988115ac69ecc249aa0f8e8265e8daf87bc28c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_ddi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 5202fdec8e0a..2c617c98db3a 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -947,7 +947,8 @@ static const struct cnl_ddi_buf_trans * ehl_get_combo_buf_trans(struct drm_i915_private *dev_priv, int type, int rate, int *n_entries) { - if (type == INTEL_OUTPUT_DP && rate > 270000) { + if (type != INTEL_OUTPUT_HDMI && type != INTEL_OUTPUT_EDP && + rate > 270000) { *n_entries = ARRAY_SIZE(ehl_combo_phy_ddi_translations_hbr2_hbr3); return ehl_combo_phy_ddi_translations_hbr2_hbr3; } @@ -959,7 +960,7 @@ static const struct cnl_ddi_buf_trans * tgl_get_combo_buf_trans(struct drm_i915_private *dev_priv, int type, int rate, int *n_entries) { - if (type != INTEL_OUTPUT_DP) { + if (type == INTEL_OUTPUT_HDMI || type == INTEL_OUTPUT_EDP) { return icl_get_combo_buf_trans(dev_priv, type, rate, n_entries); } else if (rate > 270000) { *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr2); From 0b72a251bf92ca2378530fa1f9b35a71830ab51c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 31 Mar 2020 16:23:48 +0100 Subject: [PATCH 175/280] drm/i915/gt: Fill all the unused space in the GGTT When we allocate space in the GGTT we may have to allocate a larger region than will be populated by the object to accommodate fencing. Make sure that this space beyond the end of the buffer points safely into scratch space, in case the HW tries to access it anyway (e.g. fenced access to the last tile row). v2: Preemptively / conservatively guard gen6 ggtt as well. Reported-by: Imre Deak References: https://gitlab.freedesktop.org/drm/intel/-/issues/1554 Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Imre Deak Cc: stable@vger.kernel.org Reviewed-by: Matthew Auld Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20200331152348.26946-1-chris@chris-wilson.co.uk (cherry picked from commit 4d6c18590870fbac1e65dde5e01e621c8e0ca096) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 37 ++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index aed498a0d032..4c5a209cb669 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -191,10 +191,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, enum i915_cache_level level, u32 flags) { - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - struct sgt_iter sgt_iter; - gen8_pte_t __iomem *gtt_entries; const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0); + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *gte; + gen8_pte_t __iomem *end; + struct sgt_iter iter; dma_addr_t addr; /* @@ -202,10 +203,17 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, * not to allow the user to override access to a read only page. */ - gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; - gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; - for_each_sgt_daddr(addr, sgt_iter, vma->pages) - gen8_set_pte(gtt_entries++, pte_encode | addr); + gte = (gen8_pte_t __iomem *)ggtt->gsm; + gte += vma->node.start / I915_GTT_PAGE_SIZE; + end = gte + vma->node.size / I915_GTT_PAGE_SIZE; + + for_each_sgt_daddr(addr, iter, vma->pages) + gen8_set_pte(gte++, pte_encode | addr); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + gen8_set_pte(gte++, vm->scratch[0].encode); /* * We want to flush the TLBs only after we're certain all the PTE @@ -241,13 +249,22 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; - unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; + gen6_pte_t __iomem *gte; + gen6_pte_t __iomem *end; struct sgt_iter iter; dma_addr_t addr; + gte = (gen6_pte_t __iomem *)ggtt->gsm; + gte += vma->node.start / I915_GTT_PAGE_SIZE; + end = gte + vma->node.size / I915_GTT_PAGE_SIZE; + for_each_sgt_daddr(addr, iter, vma->pages) - iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); + iowrite32(vm->pte_encode(addr, level, flags), gte++); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + iowrite32(vm->scratch[0].encode, gte++); /* * We want to flush the TLBs only after we're certain all the PTE From 721017cf4bd8ea5e5d6295f90ea6d30bab577851 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 31 Mar 2020 17:21:50 +0100 Subject: [PATCH 176/280] drm/i915/gem: Ignore readonly failures when updating relocs If the user passes in a readonly reloc[], by the time we notice we have already committed to modifying the execobjects, or have indeed done so already. Reporting the failure just compounds the issue as we have no second pass to fall back to anymore. "Be damned if you do, and damned if you don't." Testcase: igt/gem_exec_reloc/readonly Fixes: 7dc8f1143778 ("drm/i915/gem: Drop relocation slowpath") References: fddcd00a49e9 ("drm/i915: Force the slow path after a user-write error") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20200331162150.3635-1-chris@chris-wilson.co.uk (cherry picked from commit 97a37c919f6262fe75afc4a4eb838093bf18b032) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 36d069504836..cb2c0a64f8b9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1477,10 +1477,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) * can read from this userspace address. */ offset = gen8_canonical_addr(offset & ~UPDATE); - if (unlikely(__put_user(offset, &urelocs[r-stack].presumed_offset))) { - remain = -EFAULT; - goto out; - } + __put_user(offset, + &urelocs[r - stack].presumed_offset); } } while (r++, --count); urelocs += ARRAY_SIZE(stack); From 1aaea8476d9f014667d2cb24819f9bcaf3ebb7a4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Apr 2020 12:48:21 +0100 Subject: [PATCH 177/280] drm/i915/gem: Flush all the reloc_gpu batch __i915_gem_object_flush_map() takes a byte range, so feed it the written bytes and do not mistake the u32 index as bytes! Fixes: a679f58d0510 ("drm/i915: Flush pages on acquisition") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: # v5.2+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200406114821.10949-1-chris@chris-wilson.co.uk (cherry picked from commit 30c88a47f1abd5744908d3681f54dcf823fe2a12) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index cb2c0a64f8b9..b7440f06c5e2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -896,11 +896,13 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) static void reloc_gpu_flush(struct reloc_cache *cache) { - GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32)); + struct drm_i915_gem_object *obj = cache->rq->batch->obj; + + GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - __i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size); - i915_gem_object_unpin_map(cache->rq->batch->obj); + __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); + i915_gem_object_unpin_map(obj); intel_gt_chipset_flush(cache->rq->engine->gt); From 88a77559cc06f4a80e956dbf2da2a33c5f18c0af Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 6 Apr 2020 13:58:34 +0200 Subject: [PATCH 178/280] PM / sleep: move SNAPSHOT_SET_SWAP_AREA handling into a helper Move the handling of the SNAPSHOT_SET_SWAP_AREA ioctl from the main ioctl helper into a helper function. Signed-off-by: Christoph Hellwig Signed-off-by: Rafael J. Wysocki --- kernel/power/user.c | 57 +++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/kernel/power/user.c b/kernel/power/user.c index ef90eb1fb86e..0cb555f526e4 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -196,6 +196,34 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, return res; } +static int snapshot_set_swap_area(struct snapshot_data *data, + void __user *argp) +{ + struct resume_swap_area swap_area; + sector_t offset; + dev_t swdev; + + if (swsusp_swap_in_use()) + return -EPERM; + if (copy_from_user(&swap_area, argp, sizeof(swap_area))) + return -EFAULT; + + /* + * User space encodes device types as two-byte values, + * so we need to recode them + */ + swdev = new_decode_dev(swap_area.dev); + if (!swdev) { + data->swap = -1; + return -EINVAL; + } + offset = swap_area.offset; + data->swap = swap_type_of(swdev, offset, NULL); + if (data->swap < 0) + return -ENODEV; + return 0; +} + static long snapshot_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -351,34 +379,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; case SNAPSHOT_SET_SWAP_AREA: - if (swsusp_swap_in_use()) { - error = -EPERM; - } else { - struct resume_swap_area swap_area; - dev_t swdev; - - error = copy_from_user(&swap_area, (void __user *)arg, - sizeof(struct resume_swap_area)); - if (error) { - error = -EFAULT; - break; - } - - /* - * User space encodes device types as two-byte values, - * so we need to recode them - */ - swdev = new_decode_dev(swap_area.dev); - if (swdev) { - offset = swap_area.offset; - data->swap = swap_type_of(swdev, offset, NULL); - if (data->swap < 0) - error = -ENODEV; - } else { - data->swap = -1; - error = -EINVAL; - } - } + error = snapshot_set_swap_area(data, (void __user *)arg); break; default: From 0f5c4c6e0e9874952e2950465a8859782437b465 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 6 Apr 2020 13:58:35 +0200 Subject: [PATCH 179/280] PM / sleep: handle the compat case in snapshot_set_swap_area() Use in_compat_syscall to copy directly from the 32-bit ABI structure. Signed-off-by: Christoph Hellwig Signed-off-by: Rafael J. Wysocki --- kernel/power/user.c | 54 ++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 32 deletions(-) diff --git a/kernel/power/user.c b/kernel/power/user.c index 0cb555f526e4..7959449765d9 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -196,28 +196,44 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, return res; } +struct compat_resume_swap_area { + compat_loff_t offset; + u32 dev; +} __packed; + static int snapshot_set_swap_area(struct snapshot_data *data, void __user *argp) { - struct resume_swap_area swap_area; sector_t offset; dev_t swdev; if (swsusp_swap_in_use()) return -EPERM; - if (copy_from_user(&swap_area, argp, sizeof(swap_area))) - return -EFAULT; + + if (in_compat_syscall()) { + struct compat_resume_swap_area swap_area; + + if (copy_from_user(&swap_area, argp, sizeof(swap_area))) + return -EFAULT; + swdev = new_decode_dev(swap_area.dev); + offset = swap_area.offset; + } else { + struct resume_swap_area swap_area; + + if (copy_from_user(&swap_area, argp, sizeof(swap_area))) + return -EFAULT; + swdev = new_decode_dev(swap_area.dev); + offset = swap_area.offset; + } /* * User space encodes device types as two-byte values, * so we need to recode them */ - swdev = new_decode_dev(swap_area.dev); if (!swdev) { data->swap = -1; return -EINVAL; } - offset = swap_area.offset; data->swap = swap_type_of(swdev, offset, NULL); if (data->swap < 0) return -ENODEV; @@ -394,12 +410,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, } #ifdef CONFIG_COMPAT - -struct compat_resume_swap_area { - compat_loff_t offset; - u32 dev; -} __packed; - static long snapshot_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -410,33 +420,13 @@ snapshot_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case SNAPSHOT_AVAIL_SWAP_SIZE: case SNAPSHOT_ALLOC_SWAP_PAGE: case SNAPSHOT_CREATE_IMAGE: + case SNAPSHOT_SET_SWAP_AREA: return snapshot_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); - - case SNAPSHOT_SET_SWAP_AREA: { - struct compat_resume_swap_area __user *u_swap_area = - compat_ptr(arg); - struct resume_swap_area swap_area; - mm_segment_t old_fs; - int err; - - err = get_user(swap_area.offset, &u_swap_area->offset); - err |= get_user(swap_area.dev, &u_swap_area->dev); - if (err) - return -EFAULT; - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = snapshot_ioctl(file, SNAPSHOT_SET_SWAP_AREA, - (unsigned long) &swap_area); - set_fs(old_fs); - return err; - } - default: return snapshot_ioctl(file, cmd, arg); } } - #endif /* CONFIG_COMPAT */ static const struct file_operations snapshot_fops = { From 536167d47ade11a5540b4b1a7125308b1641da7a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Apr 2020 03:13:48 +0900 Subject: [PATCH 180/280] blk-mq: don't commit_rqs() if none were queued Unburden the drivers from checking if a call to commit_rqs() is valid by not calling it when there are no requests to commit. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index f6291ceedee4..8e56884fd2e9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1289,7 +1289,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, * the driver there was more coming, but that turned out to * be a lie. */ - if (q->mq_ops->commit_rqs) + if (q->mq_ops->commit_rqs && queued) q->mq_ops->commit_rqs(hctx); spin_lock(&hctx->lock); @@ -1911,6 +1911,8 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list) { + int queued = 0; + while (!list_empty(list)) { blk_status_t ret; struct request *rq = list_first_entry(list, struct request, @@ -1926,7 +1928,8 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, break; } blk_mq_end_request(rq, ret); - } + } else + queued++; } /* @@ -1934,7 +1937,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, * the driver there was more coming, but that turned out to * be a lie. */ - if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs) + if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs && queued) hctx->queue->mq_ops->commit_rqs(hctx); } From 431e76c3edd76d84a0ed1eb81a286b2ddecc5ee4 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 7 Apr 2020 14:40:20 +0800 Subject: [PATCH 181/280] ALSA: hda/realtek - Add supported new mute Led for HP HP Note Book supported new mute Led. Hardware PIN was not enough to meet old LED rule. JD2 to control playback mute led. GPO3 to control capture mute led. (ALC285 didn't control GPO3 via verb command) This two PIN just could control by COEF registers. [ corrected typos by tiwai ] Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/6741211598ba499687362ff2aa30626b@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 81 +++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a32833c65db8..cebfdeba659e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -86,6 +86,14 @@ struct alc_spec { unsigned int gpio_mute_led_mask; unsigned int gpio_mic_led_mask; + unsigned int mute_led_coef_idx; + unsigned int mute_led_coefbit_mask; + unsigned int mute_led_coefbit_on; + unsigned int mute_led_coefbit_off; + unsigned int mic_led_coef_idx; + unsigned int mic_led_coefbit_mask; + unsigned int mic_led_coefbit_on; + unsigned int mic_led_coefbit_off; hda_nid_t headset_mic_pin; hda_nid_t headphone_mic_pin; @@ -4178,6 +4186,73 @@ static void alc280_fixup_hp_gpio4(struct hda_codec *codec, } } +/* update mute-LED according to the speaker mute state via COEF bit */ +static void alc_fixup_mute_led_coefbit_hook(void *private_data, int enabled) +{ + struct hda_codec *codec = private_data; + struct alc_spec *spec = codec->spec; + + if (spec->mute_led_polarity) + enabled = !enabled; + + /* temporarily power up/down for setting COEF bit */ + enabled ? alc_update_coef_idx(codec, spec->mute_led_coef_idx, + spec->mute_led_coefbit_mask, spec->mute_led_coefbit_off) : + alc_update_coef_idx(codec, spec->mute_led_coef_idx, + spec->mute_led_coefbit_mask, spec->mute_led_coefbit_on); +} + +static void alc285_fixup_hp_mute_led_coefbit(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->mute_led_polarity = 0; + spec->mute_led_coef_idx = 0x0b; + spec->mute_led_coefbit_mask = 1<<3; + spec->mute_led_coefbit_on = 1<<3; + spec->mute_led_coefbit_off = 0; + spec->gen.vmaster_mute.hook = alc_fixup_mute_led_coefbit_hook; + spec->gen.vmaster_mute_enum = 1; + } +} + +/* turn on/off mic-mute LED per capture hook by coef bit */ +static void alc_hp_cap_micmute_update(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + if (spec->gen.micmute_led.led_value) + alc_update_coef_idx(codec, spec->mic_led_coef_idx, + spec->mic_led_coefbit_mask, spec->mic_led_coefbit_on); + else + alc_update_coef_idx(codec, spec->mic_led_coef_idx, + spec->mic_led_coefbit_mask, spec->mic_led_coefbit_off); +} + +static void alc285_fixup_hp_coef_micmute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->mic_led_coef_idx = 0x19; + spec->mic_led_coefbit_mask = 1<<13; + spec->mic_led_coefbit_on = 1<<13; + spec->mic_led_coefbit_off = 0; + snd_hda_gen_add_micmute_led(codec, alc_hp_cap_micmute_update); + } +} + +static void alc285_fixup_hp_mute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + alc285_fixup_hp_mute_led_coefbit(codec, fix, action); + alc285_fixup_hp_coef_micmute_led(codec, fix, action); +} + #if IS_REACHABLE(CONFIG_INPUT) static void gpio2_mic_hotkey_event(struct hda_codec *codec, struct hda_jack_callback *event) @@ -5964,6 +6039,7 @@ enum { ALC285_FIXUP_THINKPAD_HEADSET_JACK, ALC294_FIXUP_ASUS_HPE, ALC285_FIXUP_HP_GPIO_LED, + ALC285_FIXUP_HP_MUTE_LED, }; static const struct hda_fixup alc269_fixups[] = { @@ -7089,6 +7165,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_gpio_led, }, + [ALC285_FIXUP_HP_MUTE_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_mute_led, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7234,6 +7314,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 24164f434dc9c23cd34fca1e36acea9d0581bdde Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 7 Apr 2020 14:52:42 +0800 Subject: [PATCH 182/280] ALSA: hda/realtek - Add HP new mute led supported for ALC236 HP new platform has new mute led feature. COEF index 0x34 bit 5 to control playback mute led. COEF index 0x35 bit 2 and bit 3 to control Mic mute led. [ corrected typos by tiwai ] Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/6741211598ba499687362ff2aa30626b@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 44 +++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index cebfdeba659e..019239190f6d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4219,6 +4219,23 @@ static void alc285_fixup_hp_mute_led_coefbit(struct hda_codec *codec, } } +static void alc236_fixup_hp_mute_led_coefbit(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->mute_led_polarity = 0; + spec->mute_led_coef_idx = 0x34; + spec->mute_led_coefbit_mask = 1<<5; + spec->mute_led_coefbit_on = 0; + spec->mute_led_coefbit_off = 1<<5; + spec->gen.vmaster_mute.hook = alc_fixup_mute_led_coefbit_hook; + spec->gen.vmaster_mute_enum = 1; + } +} + /* turn on/off mic-mute LED per capture hook by coef bit */ static void alc_hp_cap_micmute_update(struct hda_codec *codec) { @@ -4246,6 +4263,20 @@ static void alc285_fixup_hp_coef_micmute_led(struct hda_codec *codec, } } +static void alc236_fixup_hp_coef_micmute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->mic_led_coef_idx = 0x35; + spec->mic_led_coefbit_mask = 3<<2; + spec->mic_led_coefbit_on = 2<<2; + spec->mic_led_coefbit_off = 1<<2; + snd_hda_gen_add_micmute_led(codec, alc_hp_cap_micmute_update); + } +} + static void alc285_fixup_hp_mute_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -4253,6 +4284,13 @@ static void alc285_fixup_hp_mute_led(struct hda_codec *codec, alc285_fixup_hp_coef_micmute_led(codec, fix, action); } +static void alc236_fixup_hp_mute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + alc236_fixup_hp_mute_led_coefbit(codec, fix, action); + alc236_fixup_hp_coef_micmute_led(codec, fix, action); +} + #if IS_REACHABLE(CONFIG_INPUT) static void gpio2_mic_hotkey_event(struct hda_codec *codec, struct hda_jack_callback *event) @@ -6040,6 +6078,7 @@ enum { ALC294_FIXUP_ASUS_HPE, ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, + ALC236_FIXUP_HP_MUTE_LED, }; static const struct hda_fixup alc269_fixups[] = { @@ -7169,6 +7208,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_mute_led, }, + [ALC236_FIXUP_HP_MUTE_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc236_fixup_hp_mute_led, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7315,6 +7358,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 0102e4efda76d0721bc744dd80258eb6cfa25fc3 Mon Sep 17 00:00:00 2001 From: Yan Yankovskyi Date: Mon, 23 Mar 2020 18:15:11 +0200 Subject: [PATCH 183/280] xen: Use evtchn_type_t as a type for event channels Make event channel functions pass event channel port using evtchn_port_t type. It eliminates signed <-> unsigned conversion. Signed-off-by: Yan Yankovskyi Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20200323152343.GA28422@kbp1-lhp-F74019 Signed-off-by: Juergen Gross --- drivers/xen/events/events_2l.c | 16 ++--- drivers/xen/events/events_base.c | 93 ++++++++++++++------------- drivers/xen/events/events_fifo.c | 22 +++---- drivers/xen/events/events_internal.h | 30 ++++----- drivers/xen/evtchn.c | 13 ++-- drivers/xen/gntdev-common.h | 3 +- drivers/xen/gntdev.c | 2 +- drivers/xen/pvcalls-back.c | 5 +- drivers/xen/pvcalls-front.c | 15 +++-- drivers/xen/xen-pciback/xenbus.c | 7 +- drivers/xen/xen-scsiback.c | 3 +- drivers/xen/xenbus/xenbus_client.c | 6 +- include/xen/events.h | 22 +++---- include/xen/interface/event_channel.h | 2 +- include/xen/xenbus.h | 5 +- 15 files changed, 128 insertions(+), 116 deletions(-) diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index 8edef51c92e5..64df919a2111 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -53,37 +53,37 @@ static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu) set_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, cpu))); } -static void evtchn_2l_clear_pending(unsigned port) +static void evtchn_2l_clear_pending(evtchn_port_t port) { struct shared_info *s = HYPERVISOR_shared_info; sync_clear_bit(port, BM(&s->evtchn_pending[0])); } -static void evtchn_2l_set_pending(unsigned port) +static void evtchn_2l_set_pending(evtchn_port_t port) { struct shared_info *s = HYPERVISOR_shared_info; sync_set_bit(port, BM(&s->evtchn_pending[0])); } -static bool evtchn_2l_is_pending(unsigned port) +static bool evtchn_2l_is_pending(evtchn_port_t port) { struct shared_info *s = HYPERVISOR_shared_info; return sync_test_bit(port, BM(&s->evtchn_pending[0])); } -static bool evtchn_2l_test_and_set_mask(unsigned port) +static bool evtchn_2l_test_and_set_mask(evtchn_port_t port) { struct shared_info *s = HYPERVISOR_shared_info; return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0])); } -static void evtchn_2l_mask(unsigned port) +static void evtchn_2l_mask(evtchn_port_t port) { struct shared_info *s = HYPERVISOR_shared_info; sync_set_bit(port, BM(&s->evtchn_mask[0])); } -static void evtchn_2l_unmask(unsigned port) +static void evtchn_2l_unmask(evtchn_port_t port) { struct shared_info *s = HYPERVISOR_shared_info; unsigned int cpu = get_cpu(); @@ -173,7 +173,7 @@ static void evtchn_2l_handle_events(unsigned cpu) /* Timer interrupt has highest priority. */ irq = irq_from_virq(cpu, VIRQ_TIMER); if (irq != -1) { - unsigned int evtchn = evtchn_from_irq(irq); + evtchn_port_t evtchn = evtchn_from_irq(irq); word_idx = evtchn / BITS_PER_LONG; bit_idx = evtchn % BITS_PER_LONG; if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx)) @@ -228,7 +228,7 @@ static void evtchn_2l_handle_events(unsigned cpu) do { xen_ulong_t bits; - int port; + evtchn_port_t port; bits = MASK_LSBS(pending_bits, bit_idx); diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 499eff7d3f65..3a791c8485d0 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -116,7 +116,7 @@ static void clear_evtchn_to_irq_all(void) } } -static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) +static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) { unsigned row; unsigned col; @@ -143,7 +143,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) return 0; } -int get_evtchn_to_irq(unsigned evtchn) +int get_evtchn_to_irq(evtchn_port_t evtchn) { if (evtchn >= xen_evtchn_max_channels()) return -1; @@ -162,7 +162,7 @@ struct irq_info *info_for_irq(unsigned irq) static int xen_irq_info_common_setup(struct irq_info *info, unsigned irq, enum xen_irq_type type, - unsigned evtchn, + evtchn_port_t evtchn, unsigned short cpu) { int ret; @@ -184,7 +184,7 @@ static int xen_irq_info_common_setup(struct irq_info *info, } static int xen_irq_info_evtchn_setup(unsigned irq, - unsigned evtchn) + evtchn_port_t evtchn) { struct irq_info *info = info_for_irq(irq); @@ -193,7 +193,7 @@ static int xen_irq_info_evtchn_setup(unsigned irq, static int xen_irq_info_ipi_setup(unsigned cpu, unsigned irq, - unsigned evtchn, + evtchn_port_t evtchn, enum ipi_vector ipi) { struct irq_info *info = info_for_irq(irq); @@ -207,7 +207,7 @@ static int xen_irq_info_ipi_setup(unsigned cpu, static int xen_irq_info_virq_setup(unsigned cpu, unsigned irq, - unsigned evtchn, + evtchn_port_t evtchn, unsigned virq) { struct irq_info *info = info_for_irq(irq); @@ -220,7 +220,7 @@ static int xen_irq_info_virq_setup(unsigned cpu, } static int xen_irq_info_pirq_setup(unsigned irq, - unsigned evtchn, + evtchn_port_t evtchn, unsigned pirq, unsigned gsi, uint16_t domid, @@ -245,7 +245,7 @@ static void xen_irq_info_cleanup(struct irq_info *info) /* * Accessors for packed IRQ information. */ -unsigned int evtchn_from_irq(unsigned irq) +evtchn_port_t evtchn_from_irq(unsigned irq) { if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)) return 0; @@ -253,7 +253,7 @@ unsigned int evtchn_from_irq(unsigned irq) return info_for_irq(irq)->evtchn; } -unsigned irq_from_evtchn(unsigned int evtchn) +unsigned int irq_from_evtchn(evtchn_port_t evtchn) { return get_evtchn_to_irq(evtchn); } @@ -304,7 +304,7 @@ unsigned cpu_from_irq(unsigned irq) return info_for_irq(irq)->cpu; } -unsigned int cpu_from_evtchn(unsigned int evtchn) +unsigned int cpu_from_evtchn(evtchn_port_t evtchn) { int irq = get_evtchn_to_irq(evtchn); unsigned ret = 0; @@ -330,9 +330,9 @@ static bool pirq_needs_eoi_flag(unsigned irq) return info->u.pirq.flags & PIRQ_NEEDS_EOI; } -static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) +static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu) { - int irq = get_evtchn_to_irq(chn); + int irq = get_evtchn_to_irq(evtchn); struct irq_info *info = info_for_irq(irq); BUG_ON(irq == -1); @@ -354,7 +354,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) */ void notify_remote_via_irq(int irq) { - int evtchn = evtchn_from_irq(irq); + evtchn_port_t evtchn = evtchn_from_irq(irq); if (VALID_EVTCHN(evtchn)) notify_remote_via_evtchn(evtchn); @@ -445,7 +445,7 @@ static void xen_free_irq(unsigned irq) irq_free_desc(irq); } -static void xen_evtchn_close(unsigned int port) +static void xen_evtchn_close(evtchn_port_t port) { struct evtchn_close close; @@ -472,7 +472,7 @@ static void pirq_query_unmask(int irq) static void eoi_pirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(data->irq); + evtchn_port_t evtchn = evtchn_from_irq(data->irq); struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; int rc = 0; @@ -508,7 +508,7 @@ static unsigned int __startup_pirq(unsigned int irq) { struct evtchn_bind_pirq bind_pirq; struct irq_info *info = info_for_irq(irq); - int evtchn = evtchn_from_irq(irq); + evtchn_port_t evtchn = evtchn_from_irq(irq); int rc; BUG_ON(info->type != IRQT_PIRQ); @@ -561,7 +561,7 @@ static void shutdown_pirq(struct irq_data *data) { unsigned int irq = data->irq; struct irq_info *info = info_for_irq(irq); - unsigned evtchn = evtchn_from_irq(irq); + evtchn_port_t evtchn = evtchn_from_irq(irq); BUG_ON(info->type != IRQT_PIRQ); @@ -601,7 +601,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi); static void __unbind_from_irq(unsigned int irq) { - int evtchn = evtchn_from_irq(irq); + evtchn_port_t evtchn = evtchn_from_irq(irq); struct irq_info *info = irq_get_handler_data(irq); if (info->refcnt > 0) { @@ -827,7 +827,7 @@ int xen_pirq_from_irq(unsigned irq) } EXPORT_SYMBOL_GPL(xen_pirq_from_irq); -int bind_evtchn_to_irq(unsigned int evtchn) +int bind_evtchn_to_irq(evtchn_port_t evtchn) { int irq; int ret; @@ -870,8 +870,8 @@ EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; - int evtchn, irq; - int ret; + evtchn_port_t evtchn; + int ret, irq; mutex_lock(&irq_mapping_update_lock); @@ -909,7 +909,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) } int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port) + evtchn_port_t remote_port) { struct evtchn_bind_interdomain bind_interdomain; int err; @@ -924,10 +924,11 @@ int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, } EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq); -static int find_virq(unsigned int virq, unsigned int cpu) +static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn) { struct evtchn_status status; - int port, rc = -ENOENT; + evtchn_port_t port; + int rc = -ENOENT; memset(&status, 0, sizeof(status)); for (port = 0; port < xen_evtchn_max_channels(); port++) { @@ -939,7 +940,7 @@ static int find_virq(unsigned int virq, unsigned int cpu) if (status.status != EVTCHNSTAT_virq) continue; if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) { - rc = port; + *evtchn = port; break; } } @@ -962,7 +963,8 @@ EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels); int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) { struct evtchn_bind_virq bind_virq; - int evtchn, irq, ret; + evtchn_port_t evtchn = 0; + int irq, ret; mutex_lock(&irq_mapping_update_lock); @@ -988,9 +990,8 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) evtchn = bind_virq.port; else { if (ret == -EEXIST) - ret = find_virq(virq, cpu); + ret = find_virq(virq, cpu, &evtchn); BUG_ON(ret < 0); - evtchn = ret; } ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq); @@ -1019,7 +1020,7 @@ static void unbind_from_irq(unsigned int irq) mutex_unlock(&irq_mapping_update_lock); } -int bind_evtchn_to_irqhandler(unsigned int evtchn, +int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) @@ -1040,7 +1041,7 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, + evtchn_port_t remote_port, irq_handler_t handler, unsigned long irqflags, const char *devname, @@ -1132,7 +1133,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority) } EXPORT_SYMBOL_GPL(xen_set_irq_priority); -int evtchn_make_refcounted(unsigned int evtchn) +int evtchn_make_refcounted(evtchn_port_t evtchn) { int irq = get_evtchn_to_irq(evtchn); struct irq_info *info; @@ -1153,7 +1154,7 @@ int evtchn_make_refcounted(unsigned int evtchn) } EXPORT_SYMBOL_GPL(evtchn_make_refcounted); -int evtchn_get(unsigned int evtchn) +int evtchn_get(evtchn_port_t evtchn) { int irq; struct irq_info *info; @@ -1186,7 +1187,7 @@ int evtchn_get(unsigned int evtchn) } EXPORT_SYMBOL_GPL(evtchn_get); -void evtchn_put(unsigned int evtchn) +void evtchn_put(evtchn_port_t evtchn) { int irq = get_evtchn_to_irq(evtchn); if (WARN_ON(irq == -1)) @@ -1252,7 +1253,7 @@ void xen_hvm_evtchn_do_upcall(void) EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall); /* Rebind a new event channel to an existing irq. */ -void rebind_evtchn_irq(int evtchn, int irq) +void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) { struct irq_info *info = info_for_irq(irq); @@ -1284,7 +1285,7 @@ void rebind_evtchn_irq(int evtchn, int irq) } /* Rebind an evtchn so that it gets delivered to a specific cpu */ -static int xen_rebind_evtchn_to_cpu(int evtchn, unsigned int tcpu) +static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) { struct evtchn_bind_vcpu bind_vcpu; int masked; @@ -1342,7 +1343,7 @@ EXPORT_SYMBOL_GPL(xen_set_affinity_evtchn); static void enable_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(data->irq); + evtchn_port_t evtchn = evtchn_from_irq(data->irq); if (VALID_EVTCHN(evtchn)) unmask_evtchn(evtchn); @@ -1350,7 +1351,7 @@ static void enable_dynirq(struct irq_data *data) static void disable_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(data->irq); + evtchn_port_t evtchn = evtchn_from_irq(data->irq); if (VALID_EVTCHN(evtchn)) mask_evtchn(evtchn); @@ -1358,7 +1359,7 @@ static void disable_dynirq(struct irq_data *data) static void ack_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(data->irq); + evtchn_port_t evtchn = evtchn_from_irq(data->irq); if (!VALID_EVTCHN(evtchn)) return; @@ -1385,7 +1386,7 @@ static void mask_ack_dynirq(struct irq_data *data) static int retrigger_dynirq(struct irq_data *data) { - unsigned int evtchn = evtchn_from_irq(data->irq); + evtchn_port_t evtchn = evtchn_from_irq(data->irq); int masked; if (!VALID_EVTCHN(evtchn)) @@ -1440,7 +1441,8 @@ static void restore_pirqs(void) static void restore_cpu_virqs(unsigned int cpu) { struct evtchn_bind_virq bind_virq; - int virq, irq, evtchn; + evtchn_port_t evtchn; + int virq, irq; for (virq = 0; virq < NR_VIRQS; virq++) { if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) @@ -1465,7 +1467,8 @@ static void restore_cpu_virqs(unsigned int cpu) static void restore_cpu_ipis(unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; - int ipi, irq, evtchn; + evtchn_port_t evtchn; + int ipi, irq; for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) @@ -1489,7 +1492,7 @@ static void restore_cpu_ipis(unsigned int cpu) /* Clear an irq's pending state, in preparation for polling on it */ void xen_clear_irq_pending(int irq) { - int evtchn = evtchn_from_irq(irq); + evtchn_port_t evtchn = evtchn_from_irq(irq); if (VALID_EVTCHN(evtchn)) clear_evtchn(evtchn); @@ -1497,7 +1500,7 @@ void xen_clear_irq_pending(int irq) EXPORT_SYMBOL(xen_clear_irq_pending); void xen_set_irq_pending(int irq) { - int evtchn = evtchn_from_irq(irq); + evtchn_port_t evtchn = evtchn_from_irq(irq); if (VALID_EVTCHN(evtchn)) set_evtchn(evtchn); @@ -1505,7 +1508,7 @@ void xen_set_irq_pending(int irq) bool xen_test_irq_pending(int irq) { - int evtchn = evtchn_from_irq(irq); + evtchn_port_t evtchn = evtchn_from_irq(irq); bool ret = false; if (VALID_EVTCHN(evtchn)) @@ -1667,7 +1670,7 @@ module_param(fifo_events, bool, 0); void __init xen_init_IRQ(void) { int ret = -EINVAL; - unsigned int evtchn; + evtchn_port_t evtchn; if (fifo_events) ret = xen_evtchn_fifo_init(); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 76b318e88382..c60ee0450173 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -82,7 +82,7 @@ static unsigned event_array_pages __read_mostly; #endif -static inline event_word_t *event_word_from_port(unsigned port) +static inline event_word_t *event_word_from_port(evtchn_port_t port) { unsigned i = port / EVENT_WORDS_PER_PAGE; @@ -140,7 +140,7 @@ static void init_array_page(event_word_t *array_page) static int evtchn_fifo_setup(struct irq_info *info) { - unsigned port = info->evtchn; + evtchn_port_t port = info->evtchn; unsigned new_array_pages; int ret; @@ -191,37 +191,37 @@ static void evtchn_fifo_bind_to_cpu(struct irq_info *info, unsigned cpu) /* no-op */ } -static void evtchn_fifo_clear_pending(unsigned port) +static void evtchn_fifo_clear_pending(evtchn_port_t port) { event_word_t *word = event_word_from_port(port); sync_clear_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word)); } -static void evtchn_fifo_set_pending(unsigned port) +static void evtchn_fifo_set_pending(evtchn_port_t port) { event_word_t *word = event_word_from_port(port); sync_set_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word)); } -static bool evtchn_fifo_is_pending(unsigned port) +static bool evtchn_fifo_is_pending(evtchn_port_t port) { event_word_t *word = event_word_from_port(port); return sync_test_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word)); } -static bool evtchn_fifo_test_and_set_mask(unsigned port) +static bool evtchn_fifo_test_and_set_mask(evtchn_port_t port) { event_word_t *word = event_word_from_port(port); return sync_test_and_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word)); } -static void evtchn_fifo_mask(unsigned port) +static void evtchn_fifo_mask(evtchn_port_t port) { event_word_t *word = event_word_from_port(port); sync_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word)); } -static bool evtchn_fifo_is_masked(unsigned port) +static bool evtchn_fifo_is_masked(evtchn_port_t port) { event_word_t *word = event_word_from_port(port); return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word)); @@ -242,7 +242,7 @@ static void clear_masked(volatile event_word_t *word) } while (w != old); } -static void evtchn_fifo_unmask(unsigned port) +static void evtchn_fifo_unmask(evtchn_port_t port) { event_word_t *word = event_word_from_port(port); @@ -270,7 +270,7 @@ static uint32_t clear_linked(volatile event_word_t *word) return w & EVTCHN_FIFO_LINK_MASK; } -static void handle_irq_for_port(unsigned port) +static void handle_irq_for_port(evtchn_port_t port) { int irq; @@ -286,7 +286,7 @@ static void consume_one_event(unsigned cpu, { struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu); uint32_t head; - unsigned port; + evtchn_port_t port; event_word_t *word; head = q->head[priority]; diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 82938cff6c7a..10684feb094e 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -33,7 +33,7 @@ struct irq_info { int refcnt; enum xen_irq_type type; /* type */ unsigned irq; - unsigned int evtchn; /* event channel */ + evtchn_port_t evtchn; /* event channel */ unsigned short cpu; /* cpu bound */ union { @@ -60,12 +60,12 @@ struct evtchn_ops { int (*setup)(struct irq_info *info); void (*bind_to_cpu)(struct irq_info *info, unsigned cpu); - void (*clear_pending)(unsigned port); - void (*set_pending)(unsigned port); - bool (*is_pending)(unsigned port); - bool (*test_and_set_mask)(unsigned port); - void (*mask)(unsigned port); - void (*unmask)(unsigned port); + void (*clear_pending)(evtchn_port_t port); + void (*set_pending)(evtchn_port_t port); + bool (*is_pending)(evtchn_port_t port); + bool (*test_and_set_mask)(evtchn_port_t port); + void (*mask)(evtchn_port_t port); + void (*unmask)(evtchn_port_t port); void (*handle_events)(unsigned cpu); void (*resume)(void); @@ -74,11 +74,11 @@ struct evtchn_ops { extern const struct evtchn_ops *evtchn_ops; extern int **evtchn_to_irq; -int get_evtchn_to_irq(unsigned int evtchn); +int get_evtchn_to_irq(evtchn_port_t evtchn); struct irq_info *info_for_irq(unsigned irq); unsigned cpu_from_irq(unsigned irq); -unsigned cpu_from_evtchn(unsigned int evtchn); +unsigned int cpu_from_evtchn(evtchn_port_t evtchn); static inline unsigned xen_evtchn_max_channels(void) { @@ -102,32 +102,32 @@ static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info, evtchn_ops->bind_to_cpu(info, cpu); } -static inline void clear_evtchn(unsigned port) +static inline void clear_evtchn(evtchn_port_t port) { evtchn_ops->clear_pending(port); } -static inline void set_evtchn(unsigned port) +static inline void set_evtchn(evtchn_port_t port) { evtchn_ops->set_pending(port); } -static inline bool test_evtchn(unsigned port) +static inline bool test_evtchn(evtchn_port_t port) { return evtchn_ops->is_pending(port); } -static inline bool test_and_set_mask(unsigned port) +static inline bool test_and_set_mask(evtchn_port_t port) { return evtchn_ops->test_and_set_mask(port); } -static inline void mask_evtchn(unsigned port) +static inline void mask_evtchn(evtchn_port_t port) { return evtchn_ops->mask(port); } -static inline void unmask_evtchn(unsigned port) +static inline void unmask_evtchn(evtchn_port_t port) { return evtchn_ops->unmask(port); } diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 052b55a14ebc..6e0b1dd5573c 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -83,7 +83,7 @@ struct per_user_data { struct user_evtchn { struct rb_node node; struct per_user_data *user; - unsigned port; + evtchn_port_t port; bool enabled; }; @@ -138,7 +138,8 @@ static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) kfree(evtchn); } -static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port) +static struct user_evtchn *find_evtchn(struct per_user_data *u, + evtchn_port_t port) { struct rb_node *node = u->evtchns.rb_node; @@ -163,7 +164,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) struct per_user_data *u = evtchn->user; WARN(!evtchn->enabled, - "Interrupt for port %d, but apparently not enabled; per-user %p\n", + "Interrupt for port %u, but apparently not enabled; per-user %p\n", evtchn->port, u); disable_irq_nosync(irq); @@ -286,7 +287,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, mutex_lock(&u->bind_mutex); for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { - unsigned port = kbuf[i]; + evtchn_port_t port = kbuf[i]; struct user_evtchn *evtchn; evtchn = find_evtchn(u, port); @@ -361,7 +362,7 @@ static int evtchn_resize_ring(struct per_user_data *u) return 0; } -static int evtchn_bind_to_user(struct per_user_data *u, int port) +static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port) { struct user_evtchn *evtchn; struct evtchn_close close; @@ -423,7 +424,7 @@ static void evtchn_unbind_from_user(struct per_user_data *u, static DEFINE_PER_CPU(int, bind_last_selected_cpu); -static void evtchn_bind_interdom_next_vcpu(int evtchn) +static void evtchn_bind_interdom_next_vcpu(evtchn_port_t evtchn) { unsigned int selected_cpu, irq; struct irq_desc *desc; diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h index 9a3960ecff6c..20d7d059dadb 100644 --- a/drivers/xen/gntdev-common.h +++ b/drivers/xen/gntdev-common.h @@ -15,6 +15,7 @@ #include #include #include +#include struct gntdev_dmabuf_priv; @@ -38,7 +39,7 @@ struct gntdev_unmap_notify { int flags; /* Address relative to the start of the gntdev_grant_map. */ int addr; - int event; + evtchn_port_t event; }; struct gntdev_grant_map { diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 0258415ca0b2..50651e566564 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -652,7 +652,7 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) struct gntdev_grant_map *map; int rc; int out_flags; - unsigned int out_event; + evtchn_port_t out_event; if (copy_from_user(&op, u, sizeof(op))) return -EFAULT; diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index c57c71b7d53d..cf4ce3e9358d 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -300,7 +300,7 @@ static struct sock_mapping *pvcalls_new_active_socket( struct pvcalls_fedata *fedata, uint64_t id, grant_ref_t ref, - uint32_t evtchn, + evtchn_port_t evtchn, struct socket *sock) { int ret; @@ -905,7 +905,8 @@ static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map) static int backend_connect(struct xenbus_device *dev) { - int err, evtchn; + int err; + evtchn_port_t evtchn; grant_ref_t ring_ref; struct pvcalls_fedata *fedata = NULL; diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 57592a6b5c9e..b43b5595e988 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -368,12 +368,12 @@ static int alloc_active_ring(struct sock_mapping *map) return -ENOMEM; } -static int create_active(struct sock_mapping *map, int *evtchn) +static int create_active(struct sock_mapping *map, evtchn_port_t *evtchn) { void *bytes; int ret = -ENOMEM, irq = -1, i; - *evtchn = -1; + *evtchn = 0; init_waitqueue_head(&map->active.inflight_conn_req); bytes = map->active.data.in; @@ -404,7 +404,7 @@ static int create_active(struct sock_mapping *map, int *evtchn) return 0; out_error: - if (*evtchn >= 0) + if (*evtchn > 0) xenbus_free_evtchn(pvcalls_front_dev, *evtchn); return ret; } @@ -415,7 +415,8 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr, struct pvcalls_bedata *bedata; struct sock_mapping *map = NULL; struct xen_pvcalls_request *req; - int notify, req_id, ret, evtchn; + int notify, req_id, ret; + evtchn_port_t evtchn; if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM) return -EOPNOTSUPP; @@ -765,7 +766,8 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) struct sock_mapping *map; struct sock_mapping *map2 = NULL; struct xen_pvcalls_request *req; - int notify, req_id, ret, evtchn, nonblock; + int notify, req_id, ret, nonblock; + evtchn_port_t evtchn; map = pvcalls_enter_sock(sock); if (IS_ERR(map)) @@ -1125,7 +1127,8 @@ static int pvcalls_front_remove(struct xenbus_device *dev) static int pvcalls_front_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { - int ret = -ENOMEM, evtchn, i; + int ret = -ENOMEM, i; + evtchn_port_t evtchn; unsigned int max_page_order, function_calls, len; char *versions; grant_ref_t gref_head = 0; diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 833b2d2c4318..f2115587855f 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -105,13 +105,13 @@ static void free_pdev(struct xen_pcibk_device *pdev) } static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, - int remote_evtchn) + evtchn_port_t remote_evtchn) { int err = 0; void *vaddr; dev_dbg(&pdev->xdev->dev, - "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", + "Attaching to frontend resources - gnt_ref=%d evtchn=%u\n", gnt_ref, remote_evtchn); err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr); @@ -142,7 +142,8 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, static int xen_pcibk_attach(struct xen_pcibk_device *pdev) { int err = 0; - int gnt_ref, remote_evtchn; + int gnt_ref; + evtchn_port_t remote_evtchn; char *magic = NULL; diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index ba0942e481bc..75c0a2e9a6db 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -854,7 +854,8 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref, static int scsiback_map(struct vscsibk_info *info) { struct xenbus_device *dev = info->dev; - unsigned int ring_ref, evtchn; + unsigned int ring_ref; + evtchn_port_t evtchn; int err; err = xenbus_gather(XBT_NIL, dev->otherend, diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 31eb822ac313..385843256865 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -391,7 +391,7 @@ EXPORT_SYMBOL_GPL(xenbus_grant_ring); * error, the device will switch to XenbusStateClosing, and the error will be * saved in the store. */ -int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port) +int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port) { struct evtchn_alloc_unbound alloc_unbound; int err; @@ -414,7 +414,7 @@ EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn); /** * Free an existing event channel. Returns 0 on success or -errno on error. */ -int xenbus_free_evtchn(struct xenbus_device *dev, int port) +int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port) { struct evtchn_close close; int err; @@ -423,7 +423,7 @@ int xenbus_free_evtchn(struct xenbus_device *dev, int port) err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); if (err) - xenbus_dev_error(dev, err, "freeing event channel %d", port); + xenbus_dev_error(dev, err, "freeing event channel %u", port); return err; } diff --git a/include/xen/events.h b/include/xen/events.h index c0e6a0598397..12b0dcb6a120 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -14,8 +14,8 @@ unsigned xen_evtchn_nr_channels(void); -int bind_evtchn_to_irq(unsigned int evtchn); -int bind_evtchn_to_irqhandler(unsigned int evtchn, +int bind_evtchn_to_irq(evtchn_port_t evtchn); +int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); @@ -31,9 +31,9 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, const char *devname, void *dev_id); int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port); + evtchn_port_t remote_port); int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, + evtchn_port_t remote_port, irq_handler_t handler, unsigned long irqflags, const char *devname, @@ -54,15 +54,15 @@ int xen_set_irq_priority(unsigned irq, unsigned priority); /* * Allow extra references to event channels exposed to userspace by evtchn */ -int evtchn_make_refcounted(unsigned int evtchn); -int evtchn_get(unsigned int evtchn); -void evtchn_put(unsigned int evtchn); +int evtchn_make_refcounted(evtchn_port_t evtchn); +int evtchn_get(evtchn_port_t evtchn); +void evtchn_put(evtchn_port_t evtchn); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); -void rebind_evtchn_irq(int evtchn, int irq); +void rebind_evtchn_irq(evtchn_port_t evtchn, int irq); int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu); -static inline void notify_remote_via_evtchn(int port) +static inline void notify_remote_via_evtchn(evtchn_port_t port) { struct evtchn_send send = { .port = port }; (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); @@ -86,9 +86,9 @@ void xen_poll_irq(int irq); void xen_poll_irq_timeout(int irq, u64 timeout); /* Determine the IRQ which is bound to an event channel */ -unsigned irq_from_evtchn(unsigned int evtchn); +unsigned int irq_from_evtchn(evtchn_port_t evtchn); int irq_from_virq(unsigned int cpu, unsigned int virq); -unsigned int evtchn_from_irq(unsigned irq); +evtchn_port_t evtchn_from_irq(unsigned irq); #ifdef CONFIG_XEN_PVHVM /* Xen HVM evtchn vector callback */ diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h index 45650c9a06d5..cf80e338fbb0 100644 --- a/include/xen/interface/event_channel.h +++ b/include/xen/interface/event_channel.h @@ -220,7 +220,7 @@ struct evtchn_expand_array { #define EVTCHNOP_set_priority 13 struct evtchn_set_priority { /* IN parameters. */ - uint32_t port; + evtchn_port_t port; uint32_t priority; }; diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 8c0d1edc121c..5a8315e6d8a6 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -47,6 +47,7 @@ #include #include #include +#include #define XENBUS_MAX_RING_GRANT_ORDER 4 #define XENBUS_MAX_RING_GRANTS (1U << XENBUS_MAX_RING_GRANT_ORDER) @@ -212,8 +213,8 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); -int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); -int xenbus_free_evtchn(struct xenbus_device *dev, int port); +int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port); +int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port); enum xenbus_state xenbus_read_driver_state(const char *path); From 3a169c0be75b59dd85d159493634870cdec6d3c4 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 3 Apr 2020 11:00:34 +0200 Subject: [PATCH 184/280] xen/blkfront: fix memory allocation flags in blkfront_setup_indirect() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1d5c76e664333 ("xen-blkfront: switch kcalloc to kvcalloc for large array allocation") didn't fix the issue it was meant to, as the flags for allocating the memory are GFP_NOIO, which will lead the memory allocation falling back to kmalloc(). So instead of GFP_NOIO use GFP_KERNEL and do all the memory allocation in blkfront_setup_indirect() in a memalloc_noio_{save,restore} section. Fixes: 1d5c76e664333 ("xen-blkfront: switch kcalloc to kvcalloc for large array allocation") Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Acked-by: Roger Pau Monné Link: https://lore.kernel.org/r/20200403090034.8753-1-jgross@suse.com Signed-off-by: Juergen Gross --- drivers/block/xen-blkfront.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9df516a56bb2..b32877e0b384 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -2189,10 +2190,12 @@ static void blkfront_setup_discard(struct blkfront_info *info) static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) { - unsigned int psegs, grants; + unsigned int psegs, grants, memflags; int err, i; struct blkfront_info *info = rinfo->dev_info; + memflags = memalloc_noio_save(); + if (info->max_indirect_segments == 0) { if (!HAS_EXTRA_REQ) grants = BLKIF_MAX_SEGMENTS_PER_REQUEST; @@ -2224,7 +2227,7 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) BUG_ON(!list_empty(&rinfo->indirect_pages)); for (i = 0; i < num; i++) { - struct page *indirect_page = alloc_page(GFP_NOIO); + struct page *indirect_page = alloc_page(GFP_KERNEL); if (!indirect_page) goto out_of_memory; list_add(&indirect_page->lru, &rinfo->indirect_pages); @@ -2235,15 +2238,15 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) rinfo->shadow[i].grants_used = kvcalloc(grants, sizeof(rinfo->shadow[i].grants_used[0]), - GFP_NOIO); + GFP_KERNEL); rinfo->shadow[i].sg = kvcalloc(psegs, sizeof(rinfo->shadow[i].sg[0]), - GFP_NOIO); + GFP_KERNEL); if (info->max_indirect_segments) rinfo->shadow[i].indirect_grants = kvcalloc(INDIRECT_GREFS(grants), sizeof(rinfo->shadow[i].indirect_grants[0]), - GFP_NOIO); + GFP_KERNEL); if ((rinfo->shadow[i].grants_used == NULL) || (rinfo->shadow[i].sg == NULL) || (info->max_indirect_segments && @@ -2252,6 +2255,7 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) sg_init_table(rinfo->shadow[i].sg, psegs); } + memalloc_noio_restore(memflags); return 0; @@ -2271,6 +2275,9 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) __free_page(indirect_page); } } + + memalloc_noio_restore(memflags); + return -ENOMEM; } From e96387677c2a4a35a0c8b16cd517696a04050062 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 7 Apr 2020 15:21:49 +0300 Subject: [PATCH 185/280] ipmi: kcs: Fix aspeed_kcs_probe_of_v1() This needs to return the newly allocated struct but instead it returns zero which leads to an immediate Oops in the caller. Fixes: 09f5f680707e ("ipmi: kcs: aspeed: Implement v2 bindings") Signed-off-by: Dan Carpenter Message-Id: <20200407122149.GA100026@mwanda> Signed-off-by: Corey Minyard --- drivers/char/ipmi/kcs_bmc_aspeed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c index 9422d55a0476..a140203c079b 100644 --- a/drivers/char/ipmi/kcs_bmc_aspeed.c +++ b/drivers/char/ipmi/kcs_bmc_aspeed.c @@ -271,7 +271,7 @@ static struct kcs_bmc *aspeed_kcs_probe_of_v1(struct platform_device *pdev) kcs->ioreg = ast_kcs_bmc_ioregs[channel - 1]; aspeed_kcs_set_address(kcs, slave); - return 0; + return kcs; } static int aspeed_kcs_calculate_channel(const struct kcs_ioreg *regs) From ccfc531695f3a4aada042f6bdb33ac6be24e1aec Mon Sep 17 00:00:00 2001 From: Mike Willard Date: Wed, 1 Apr 2020 20:54:54 +0000 Subject: [PATCH 186/280] ASoC: cs4270: pull reset GPIO low then high Pull the RST line low then high when initializing the driver, in order to force a reset of the chip. Previously, the line was not pulled low, which could result in the chip registers not resetting to their default values on boot. Signed-off-by: Mike Willard Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200401205454.79792-1-mwillard@izotope.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs4270.c | 40 ++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c index 5f25b9f872bd..8a02791e44ad 100644 --- a/sound/soc/codecs/cs4270.c +++ b/sound/soc/codecs/cs4270.c @@ -137,6 +137,9 @@ struct cs4270_private { /* power domain regulators */ struct regulator_bulk_data supplies[ARRAY_SIZE(supply_names)]; + + /* reset gpio */ + struct gpio_desc *reset_gpio; }; static const struct snd_soc_dapm_widget cs4270_dapm_widgets[] = { @@ -648,6 +651,22 @@ static const struct regmap_config cs4270_regmap = { .volatile_reg = cs4270_reg_is_volatile, }; +/** + * cs4270_i2c_remove - deinitialize the I2C interface of the CS4270 + * @i2c_client: the I2C client object + * + * This function puts the chip into low power mode when the i2c device + * is removed. + */ +static int cs4270_i2c_remove(struct i2c_client *i2c_client) +{ + struct cs4270_private *cs4270 = i2c_get_clientdata(i2c_client); + + gpiod_set_value_cansleep(cs4270->reset_gpio, 0); + + return 0; +} + /** * cs4270_i2c_probe - initialize the I2C interface of the CS4270 * @i2c_client: the I2C client object @@ -660,7 +679,6 @@ static int cs4270_i2c_probe(struct i2c_client *i2c_client, const struct i2c_device_id *id) { struct cs4270_private *cs4270; - struct gpio_desc *reset_gpiod; unsigned int val; int ret, i; @@ -679,10 +697,21 @@ static int cs4270_i2c_probe(struct i2c_client *i2c_client, if (ret < 0) return ret; - reset_gpiod = devm_gpiod_get_optional(&i2c_client->dev, "reset", - GPIOD_OUT_HIGH); - if (PTR_ERR(reset_gpiod) == -EPROBE_DEFER) - return -EPROBE_DEFER; + /* reset the device */ + cs4270->reset_gpio = devm_gpiod_get_optional(&i2c_client->dev, "reset", + GPIOD_OUT_LOW); + if (IS_ERR(cs4270->reset_gpio)) { + dev_dbg(&i2c_client->dev, "Error getting CS4270 reset GPIO\n"); + return PTR_ERR(cs4270->reset_gpio); + } + + if (cs4270->reset_gpio) { + dev_dbg(&i2c_client->dev, "Found reset GPIO\n"); + gpiod_set_value_cansleep(cs4270->reset_gpio, 1); + } + + /* Sleep 500ns before i2c communications */ + ndelay(500); cs4270->regmap = devm_regmap_init_i2c(i2c_client, &cs4270_regmap); if (IS_ERR(cs4270->regmap)) @@ -735,6 +764,7 @@ static struct i2c_driver cs4270_i2c_driver = { }, .id_table = cs4270_id, .probe = cs4270_i2c_probe, + .remove = cs4270_i2c_remove, }; module_i2c_driver(cs4270_i2c_driver); From 0ad3f0b384d58f3bd1f4fb87d0af5b8f6866f41a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 7 Apr 2020 10:44:01 +0200 Subject: [PATCH 187/280] ALSA: hda: Fix potential access overflow in beep helper The beep control helper function blindly stores the values in two stereo channels no matter whether the actual control is mono or stereo. This is practically harmless, but it annoys the recently introduced sanity check, resulting in an error when the checker is enabled. This patch corrects the behavior to store only on the defined array member. Fixes: 0401e8548eac ("ALSA: hda - Move beep helper functions to hda_beep.c") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=207139 Reviewed-by: Jaroslav Kysela Cc: Link: https://lore.kernel.org/r/20200407084402.25589-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_beep.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_beep.c b/sound/pci/hda/hda_beep.c index f5fd62ed4df5..841523f6b88d 100644 --- a/sound/pci/hda/hda_beep.c +++ b/sound/pci/hda/hda_beep.c @@ -290,8 +290,12 @@ int snd_hda_mixer_amp_switch_get_beep(struct snd_kcontrol *kcontrol, { struct hda_codec *codec = snd_kcontrol_chip(kcontrol); struct hda_beep *beep = codec->beep; + int chs = get_amp_channels(kcontrol); + if (beep && (!beep->enabled || !ctl_has_mute(kcontrol))) { - ucontrol->value.integer.value[0] = + if (chs & 1) + ucontrol->value.integer.value[0] = beep->enabled; + if (chs & 2) ucontrol->value.integer.value[1] = beep->enabled; return 0; } From c47914c00be346bc5b48c48de7b0da5c2d1a296c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 7 Apr 2020 10:44:02 +0200 Subject: [PATCH 188/280] ALSA: ice1724: Fix invalid access for enumerated ctl items The access to Analog Capture Source control value implemented in prodigy_hifi.c is wrong, as caught by the recently introduced sanity check; it should be accessing value.enumerated.item[] instead of value.integer.value[]. This patch corrects the wrong access pattern. Fixes: 6b8d6e5518e2 ("[ALSA] ICE1724: Added support for Audiotrak Prodigy 7.1 HiFi & HD2, Hercules Fortissimo IV") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=207139 Reviewed-by: Jaroslav Kysela Cc: Link: https://lore.kernel.org/r/20200407084402.25589-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ice1712/prodigy_hifi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/ice1712/prodigy_hifi.c b/sound/pci/ice1712/prodigy_hifi.c index 91f83cef0e56..9aa12a67d370 100644 --- a/sound/pci/ice1712/prodigy_hifi.c +++ b/sound/pci/ice1712/prodigy_hifi.c @@ -536,7 +536,7 @@ static int wm_adc_mux_enum_get(struct snd_kcontrol *kcontrol, struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol); mutex_lock(&ice->gpio_mutex); - ucontrol->value.integer.value[0] = wm_get(ice, WM_ADC_MUX) & 0x1f; + ucontrol->value.enumerated.item[0] = wm_get(ice, WM_ADC_MUX) & 0x1f; mutex_unlock(&ice->gpio_mutex); return 0; } @@ -550,7 +550,7 @@ static int wm_adc_mux_enum_put(struct snd_kcontrol *kcontrol, mutex_lock(&ice->gpio_mutex); oval = wm_get(ice, WM_ADC_MUX); - nval = (oval & 0xe0) | ucontrol->value.integer.value[0]; + nval = (oval & 0xe0) | ucontrol->value.enumerated.item[0]; if (nval != oval) { wm_put(ice, WM_ADC_MUX, nval); change = 1; From 211fea18a7bb9b8d51cb5d2b9cbe5583af256609 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 6 Apr 2020 23:54:39 +0100 Subject: [PATCH 189/280] io_uring: remove redundant variable pointer nxt and io_wq_assign_next call An earlier commit "io_uring: remove @nxt from handlers" removed the setting of pointer nxt and now it is always null, hence the non-null check and call to io_wq_assign_next is redundant and can be removed. Addresses-Coverity: ("'Constant' variable guard") Reviewed-by: Chaitanya Kulkarni Signed-off-by: Colin Ian King Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 79bd22289d73..20662bbc0507 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3509,14 +3509,11 @@ static void __io_sync_file_range(struct io_kiocb *req) static void io_sync_file_range_finish(struct io_wq_work **workptr) { struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - struct io_kiocb *nxt = NULL; if (io_req_cancelled(req)) return; __io_sync_file_range(req); io_put_req(req); /* put submission ref */ - if (nxt) - io_wq_assign_next(workptr, nxt); } static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock) From f7fe9346869a12efe3af3cc9be2e45a1b6ff8761 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Tue, 7 Apr 2020 20:02:31 +0800 Subject: [PATCH 190/280] io_uring: initialize fixed_file_data lock syzbot reports below warning: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 1 PID: 7099 Comm: syz-executor897 Not tainted 5.6.0-next-20200406-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x188/0x20d lib/dump_stack.c:118 assign_lock_key kernel/locking/lockdep.c:913 [inline] register_lock_class+0x1664/0x1760 kernel/locking/lockdep.c:1225 __lock_acquire+0x104/0x4e00 kernel/locking/lockdep.c:4223 lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x8c/0xbf kernel/locking/spinlock.c:159 io_sqe_files_register fs/io_uring.c:6599 [inline] __io_uring_register+0x1fe8/0x2f00 fs/io_uring.c:8001 __do_sys_io_uring_register fs/io_uring.c:8081 [inline] __se_sys_io_uring_register fs/io_uring.c:8063 [inline] __x64_sys_io_uring_register+0x192/0x560 fs/io_uring.c:8063 do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295 entry_SYSCALL_64_after_hwframe+0x49/0xb3 RIP: 0033:0x440289 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffff1bbf558 EFLAGS: 00000246 ORIG_RAX: 00000000000001ab RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440289 RDX: 0000000020000280 RSI: 0000000000000002 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000000 R09: 00000000004002c8 R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000401b10 R13: 0000000000401ba0 R14: 0000000000000000 R15: 0000000000000000 Initialize struct fixed_file_data's lock to fix this issue. Reported-by: syzbot+e6eeca4a035da76b3065@syzkaller.appspotmail.com Fixes: 055895537302 ("io_uring: refactor file register/unregister/update handling") Signed-off-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 20662bbc0507..773f55c49cd8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6501,6 +6501,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, ctx->file_data->ctx = ctx; init_completion(&ctx->file_data->done); INIT_LIST_HEAD(&ctx->file_data->ref_list); + spin_lock_init(&ctx->file_data->lock); nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE); ctx->file_data->table = kcalloc(nr_tables, From 21f9024355e58772ec5d7fc3534aa5e29d72a8b6 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 7 Apr 2020 11:02:28 +0000 Subject: [PATCH 191/280] nvmet-rdma: fix double free of rdma queue In case rdma accept fails at nvmet_rdma_queue_connect(), release work is scheduled. Later on, a new RDMA CM event may arrive since we didn't destroy the cm-id and call nvmet_rdma_queue_connect_fail(), which schedule another release work. This will cause calling nvmet_rdma_free_queue twice. To fix this we implicitly destroy the cm_id with non-zero ret code, which guarantees that new rdma_cm events will not arrive afterwards. Also add a qp pointer to nvmet_rdma_queue structure, so we can use it when the cm_id pointer is NULL or was destroyed. Signed-off-by: Israel Rukshin Suggested-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index f78201421978..ab867f32fb0d 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -78,6 +78,7 @@ enum nvmet_rdma_queue_state { struct nvmet_rdma_queue { struct rdma_cm_id *cm_id; + struct ib_qp *qp; struct nvmet_port *port; struct ib_cq *cq; atomic_t sq_wr_avail; @@ -474,7 +475,7 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, if (ndev->srq) ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); else - ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL); + ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL); if (unlikely(ret)) pr_err("post_recv cmd failed\n"); @@ -513,7 +514,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); if (rsp->n_rdma) { - rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, + rdma_rw_ctx_destroy(&rsp->rw, queue->qp, queue->cm_id->port_num, rsp->req.sg, rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); } @@ -597,7 +598,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(rsp->n_rdma <= 0); atomic_add(rsp->n_rdma, &queue->sq_wr_avail); - rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, + rdma_rw_ctx_destroy(&rsp->rw, queue->qp, queue->cm_id->port_num, rsp->req.sg, rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); rsp->n_rdma = 0; @@ -752,7 +753,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp) } if (nvmet_rdma_need_data_in(rsp)) { - if (rdma_rw_ctx_post(&rsp->rw, queue->cm_id->qp, + if (rdma_rw_ctx_post(&rsp->rw, queue->qp, queue->cm_id->port_num, &rsp->read_cqe, NULL)) nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR); } else { @@ -1038,6 +1039,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) pr_err("failed to create_qp ret= %d\n", ret); goto err_destroy_cq; } + queue->qp = queue->cm_id->qp; atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr); @@ -1066,11 +1068,10 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) { - struct ib_qp *qp = queue->cm_id->qp; - - ib_drain_qp(qp); - rdma_destroy_id(queue->cm_id); - ib_destroy_qp(qp); + ib_drain_qp(queue->qp); + if (queue->cm_id) + rdma_destroy_id(queue->cm_id); + ib_destroy_qp(queue->qp); ib_free_cq(queue->cq); } @@ -1305,9 +1306,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) { - schedule_work(&queue->release_work); - /* Destroying rdma_cm id is not needed here */ - return 0; + /* + * Don't destroy the cm_id in free path, as we implicitly + * destroy the cm_id here with non-zero ret code. + */ + queue->cm_id = NULL; + goto free_queue; } mutex_lock(&nvmet_rdma_queue_mutex); @@ -1316,6 +1320,8 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, return 0; +free_queue: + nvmet_rdma_free_queue(queue); put_device: kref_put(&ndev->ref, nvmet_rdma_free_dev); From dec7880579d392aac4fff1bda8240e768415d6a9 Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Fri, 3 Apr 2020 11:32:15 +0800 Subject: [PATCH 192/280] drm/amd/amdgpu: Correct gfx10's CG sequence Incorrect CG sequence will cause gfx timedout, if we keep switching power profile mode (enter profile mod such as PEAK will disable CG, exit profile mode EXIT will enable CG) when run Vulkan test case(case used for test: vkexample). Signed-off-by: Chengming Gui Reviewed-by: Kenneth Feng Acked-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index b9664b46de83..d78059fd2c72 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4104,6 +4104,12 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { + /* 0 - Disable some blocks' MGCG */ + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); + WREG32_SOC15(GC, 0, mmCGTT_WD_CLK_CTRL, 0xff000000); + WREG32_SOC15(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xff000000); + WREG32_SOC15(GC, 0, mmCGTT_IA_CLK_CTRL, 0xff000000); + /* 1 - RLC_CGTT_MGCG_OVERRIDE */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | @@ -4143,19 +4149,20 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); - /* 2 - disable MGLS in RLC */ + /* 2 - disable MGLS in CP */ + data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { + data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); + } + + /* 3 - disable MGLS in RLC */ data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); } - /* 3 - disable MGLS in CP */ - data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); - if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { - data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; - WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); - } } } @@ -4266,7 +4273,7 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, /* === CGCG /CGLS for GFX 3D Only === */ gfx_v10_0_update_3d_clock_gating(adev, enable); /* === MGCG + MGLS === */ - gfx_v10_0_update_medium_grain_clock_gating(adev, enable); + /* gfx_v10_0_update_medium_grain_clock_gating(adev, enable); */ } if (adev->cg_flags & From 0b9ebd7eebb7419d18030f0364a9392ffbf1d793 Mon Sep 17 00:00:00 2001 From: John Clements Date: Tue, 7 Apr 2020 15:08:15 +0800 Subject: [PATCH 193/280] drm/amdgpu: resolve mGPU RAS query instability upon receiving uncorrectable error, query every GPU node for ras errors Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3c32a94d2424..ab379b44679c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1424,12 +1424,22 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) { struct amdgpu_ras *ras = container_of(work, struct amdgpu_ras, recovery_work); + struct amdgpu_device *remote_adev = NULL; + struct amdgpu_device *adev = ras->adev; + struct list_head device_list, *device_list_handle = NULL; + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, false); - /* - * Query and print non zero error counter per IP block for - * awareness before recovering GPU. - */ - amdgpu_ras_log_on_err_counter(ras->adev); + /* Build list of devices to query RAS related errors */ + if (hive && adev->gmc.xgmi.num_physical_nodes > 1) { + device_list_handle = &hive->device_list; + } else { + list_add_tail(&adev->gmc.xgmi.head, &device_list); + device_list_handle = &device_list; + } + + list_for_each_entry(remote_adev, device_list_handle, gmc.xgmi.head) { + amdgpu_ras_log_on_err_counter(remote_adev); + } if (amdgpu_device_should_recover_gpu(ras->adev)) amdgpu_device_gpu_recover(ras->adev, 0); From 2b961e6a9529c3f205e003432e4ccc3bea1ae4b3 Mon Sep 17 00:00:00 2001 From: John Clements Date: Tue, 7 Apr 2020 16:54:33 +0800 Subject: [PATCH 194/280] drm/amdgpu: update RAS related dmesg print prefix RAS error related dmesg print with pci device info Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 13 ++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c index cceb46faf212..dce945ef21a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -710,14 +710,16 @@ static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev, sec_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, SEC_COUNT); if (sec_count) { - DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + dev_info(adev->dev, + "Instance[%d]: SubBlock %s, SEC %d\n", i, vml2_mems[i], sec_count); err_data->ce_count += sec_count; } ded_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, DED_COUNT); if (ded_count) { - DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + dev_info(adev->dev, + "Instance[%d]: SubBlock %s, DED %d\n", i, vml2_mems[i], ded_count); err_data->ue_count += ded_count; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 0d413fabd015..c0e3efcb09bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -1539,8 +1539,11 @@ static const struct soc15_reg_entry mmhub_v9_4_edc_cnt_regs[] = { { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), 0, 0, 0 }, }; -static int mmhub_v9_4_get_ras_error_count(const struct soc15_reg_entry *reg, - uint32_t value, uint32_t *sec_count, uint32_t *ded_count) +static int mmhub_v9_4_get_ras_error_count(struct amdgpu_device *adev, + const struct soc15_reg_entry *reg, + uint32_t value, + uint32_t *sec_count, + uint32_t *ded_count) { uint32_t i; uint32_t sec_cnt, ded_cnt; @@ -1553,7 +1556,7 @@ static int mmhub_v9_4_get_ras_error_count(const struct soc15_reg_entry *reg, mmhub_v9_4_ras_fields[i].sec_count_mask) >> mmhub_v9_4_ras_fields[i].sec_count_shift; if (sec_cnt) { - DRM_INFO("MMHUB SubBlock %s, SEC %d\n", + dev_info(adev->dev, "MMHUB SubBlock %s, SEC %d\n", mmhub_v9_4_ras_fields[i].name, sec_cnt); *sec_count += sec_cnt; @@ -1563,7 +1566,7 @@ static int mmhub_v9_4_get_ras_error_count(const struct soc15_reg_entry *reg, mmhub_v9_4_ras_fields[i].ded_count_mask) >> mmhub_v9_4_ras_fields[i].ded_count_shift; if (ded_cnt) { - DRM_INFO("MMHUB SubBlock %s, DED %d\n", + dev_info(adev->dev, "MMHUB SubBlock %s, DED %d\n", mmhub_v9_4_ras_fields[i].name, ded_cnt); *ded_count += ded_cnt; @@ -1588,7 +1591,7 @@ static void mmhub_v9_4_query_ras_error_count(struct amdgpu_device *adev, reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_edc_cnt_regs[i])); if (reg_value) - mmhub_v9_4_get_ras_error_count(&mmhub_v9_4_edc_cnt_regs[i], + mmhub_v9_4_get_ras_error_count(adev, &mmhub_v9_4_edc_cnt_regs[i], reg_value, &sec_count, &ded_count); } From 8f0622a19bef2dc09b511ccf8c536067521ade83 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 3 Apr 2020 12:25:48 -0400 Subject: [PATCH 195/280] drm/amdgpu/psp: dont warn on missing optional TA's Replace dev_warn() with dev_info() and note that they are optional to avoid confusing users. The RAS TAs only exist on server boards and the HDCP and DTM TAs only exist on client boards. They are optional either way. Acked-by: Nirmoy Das Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index be50867ea644..deaa26808841 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -818,7 +818,7 @@ static int psp_ras_initialize(struct psp_context *psp) if (!psp->adev->psp.ta_ras_ucode_size || !psp->adev->psp.ta_ras_start_addr) { - dev_warn(psp->adev->dev, "RAS: ras ta ucode is not available\n"); + dev_info(psp->adev->dev, "RAS: optional ras ta ucode is not available\n"); return 0; } @@ -902,7 +902,7 @@ static int psp_hdcp_initialize(struct psp_context *psp) if (!psp->adev->psp.ta_hdcp_ucode_size || !psp->adev->psp.ta_hdcp_start_addr) { - dev_warn(psp->adev->dev, "HDCP: hdcp ta ucode is not available\n"); + dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n"); return 0; } @@ -1048,7 +1048,7 @@ static int psp_dtm_initialize(struct psp_context *psp) if (!psp->adev->psp.ta_dtm_ucode_size || !psp->adev->psp.ta_dtm_start_addr) { - dev_warn(psp->adev->dev, "DTM: dtm ta ucode is not available\n"); + dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n"); return 0; } From 8c61b31e3c3f7dd843ebbf1cc02ca352b66f123f Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Sun, 5 Apr 2020 16:41:09 -0400 Subject: [PATCH 196/280] drm/amd/display: Avoid create MST prop after registration [Why] Prop are created at boot stage, and not allowed to create new prop after device registration. [How] Reuse the connector property from SST if exist. Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Hersen Wu Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++++--- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 8 ++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index bab587ab6e8d..ffffbf4ec650 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5929,7 +5929,8 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, adev->mode_info.underscan_vborder_property, 0); - drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16); + if (!aconnector->mst_port) + drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16); /* This defaults to the max in the range, but we want 8bpc for non-edp. */ aconnector->base.state->max_bpc = (connector_type == DRM_MODE_CONNECTOR_eDP) ? 16 : 8; @@ -5948,8 +5949,9 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, &aconnector->base.base, dm->ddev->mode_config.hdr_output_metadata_property, 0); - drm_connector_attach_vrr_capable_property( - &aconnector->base); + if (!aconnector->mst_port) + drm_connector_attach_vrr_capable_property(&aconnector->base); + #ifdef CONFIG_DRM_AMD_DC_HDCP if (adev->dm.hdcp_workqueue) drm_connector_attach_content_protection_property(&aconnector->base, true); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index e8208df420d9..fabbe78d5aef 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -410,6 +410,14 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_connector_attach_encoder(&aconnector->base, &aconnector->mst_encoder->base); + connector->max_bpc_property = master->base.max_bpc_property; + if (connector->max_bpc_property) + drm_connector_attach_max_bpc_property(connector, 8, 16); + + connector->vrr_capable_property = master->base.vrr_capable_property; + if (connector->vrr_capable_property) + drm_connector_attach_vrr_capable_property(connector); + drm_object_attach_property( &connector->base, dev->mode_config.path_property, From 346d8a0a3c91888a412c2735d69daa09c00f0203 Mon Sep 17 00:00:00 2001 From: Isabel Zhang Date: Sun, 5 Apr 2020 16:41:01 -0400 Subject: [PATCH 197/280] drm/amd/display: Update stream adjust in dc_stream_adjust_vmin_vmax [Why] After v_total_min and max are updated in vrr structure, the changes are not reflected in stream adjust. When these values are read from stream adjust it does not reflect the actual state of the system. [How] Set stream adjust values equal to vrr adjust values after vrr adjust values are updated. Signed-off-by: Isabel Zhang Reviewed-by: Alvin Lee Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f21bbb295ad3..3c9e5ed935dc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -283,6 +283,8 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, int i = 0; bool ret = false; + stream->adjust = *adjust; + for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; From e99acf7d1a9c67a1030427a0fbc2361eda6d1496 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Sun, 5 Apr 2020 16:40:53 -0400 Subject: [PATCH 198/280] drm/amd/display: Translate cursor position by source rect [Why] Cursor is drawn as part of the framebuffer for a plane on AMD hardware. The cursor position on the framebuffer does not change even if the source rect viewport for the cursor does. This causes the cursor to be clipped. The following IGT tests fail as a result of this issue: - kms_plane_cursor@pipe-*-viewport-size-* [How] Offset cursor position by plane source rect viewport. If the viewport is unscaled then the cursor is now correctly positioned on any plane - primary or overlay. There is still a hardware limitation for dealing with the cursor size being incorrectly scaled but that's not something we can address. Add some documentation explaining some of this in the code while we're at it. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Zhan Liu Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 0be010085575..37d4a612058a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -3021,12 +3021,44 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) int x_pos = pos_cpy.x; int y_pos = pos_cpy.y; - // translate cursor from stream space to plane space + /** + * DC cursor is stream space, HW cursor is plane space and drawn + * as part of the framebuffer. + * + * Cursor position can't be negative, but hotspot can be used to + * shift cursor out of the plane bounds. Hotspot must be smaller + * than the cursor size. + */ + + /** + * Translate cursor from stream space to plane space. + * + * If the cursor is scaled then we need to scale the position + * to be in the approximately correct place. We can't do anything + * about the actual size being incorrect, that's a limitation of + * the hardware. + */ x_pos = (x_pos - x_plane) * pipe_ctx->plane_state->src_rect.width / pipe_ctx->plane_state->dst_rect.width; y_pos = (y_pos - y_plane) * pipe_ctx->plane_state->src_rect.height / pipe_ctx->plane_state->dst_rect.height; + /** + * If the cursor's source viewport is clipped then we need to + * translate the cursor to appear in the correct position on + * the screen. + * + * This translation isn't affected by scaling so it needs to be + * done *after* we adjust the position for the scale factor. + */ + x_pos += pipe_ctx->plane_state->src_rect.x; + y_pos += pipe_ctx->plane_state->src_rect.y; + + /** + * If the position is negative then we need to add to the hotspot + * to shift the cursor outside the plane. + */ + if (x_pos < 0) { pos_cpy.x_hotspot -= x_pos; x_pos = 0; From a754d13203d88f217c79bdb3fb659531fac14a39 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Sun, 5 Apr 2020 16:40:54 -0400 Subject: [PATCH 199/280] drm/amd/display: change default pipe_split policy for DCN1 [Why] Changing policy to dynamic will allow 4k multi display configs to be supported at DPM0 Signed-off-by: Eric Yang Reviewed-by: Tony Cheng Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 8b7122249ddc..07265ca7d28c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -585,7 +585,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_pplib_clock_request = false, .disable_pplib_wm_range = false, .pplib_wm_report_mode = WM_REPORT_DEFAULT, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = true, .disable_dcc = DCC_ENABLE, .voltage_align_fclk = true, From 56b922c106cd13e00b4cd58dfc4037bc21daf51a Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Sun, 5 Apr 2020 16:40:55 -0400 Subject: [PATCH 200/280] drm/amd/display: Fix incorrect cursor pos on scaled primary plane [Why] Cursor pos is correctly adjusted from DC side for source rect offset on DCN ASIC, but only on the overlay. This is because DM places offsets the cursor for primary planes only to workaround missing code in DCE for the adjustment we're now correctly doing in DC for DCN ASIC. [How] Drop the adjustment for source rect from the DM side of things and put the code where it actually belongs - in DC on the pipe level. This matches what we do for DCN now. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Zhan Liu Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ------ .../drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ffffbf4ec650..de60411eef11 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6274,12 +6274,6 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, y <= -amdgpu_crtc->max_cursor_height) return 0; - if (crtc->primary->state) { - /* avivo cursor are offset into the total surface */ - x += crtc->primary->state->src_x >> 16; - y += crtc->primary->state->src_y >> 16; - } - if (x < 0) { xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1); x = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 0976e378659f..67f7ca346696 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2685,6 +2685,17 @@ void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx) .mirror = pipe_ctx->plane_state->horizontal_mirror }; + /** + * If the cursor's source viewport is clipped then we need to + * translate the cursor to appear in the correct position on + * the screen. + * + * This translation isn't affected by scaling so it needs to be + * done *after* we adjust the position for the scale factor. + */ + pos_cpy.x += pipe_ctx->plane_state->src_rect.x; + pos_cpy.y += pipe_ctx->plane_state->src_rect.y; + if (pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) pos_cpy.enable = false; From 3efcaf77de3b12b665895508cecda45247a39f0d Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Sun, 5 Apr 2020 16:40:56 -0400 Subject: [PATCH 201/280] drm/amd/display: Program viewport when source pos changes for DCN20 hw seq [Why] For medium updates that change nothing but the source rect position the viewport doesn't change on DCN20. We're missing the check for the position update bit that was there in the DCN10 hardware sequencer. [How] Check the position bit along with the scaling bit like we were doing with DCN20. We shouldn't actually hit a case where context != current_state in our programming/commit model but guard against it anyway since it was guarded for the other bits. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Zhan Liu Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 233318260da4..22f421e82733 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1373,6 +1373,7 @@ static void dcn20_update_dchubp_dpp( } if (pipe_ctx->update_flags.bits.viewport || + (context == dc->current_state && plane_state->update_flags.bits.position_change) || (context == dc->current_state && plane_state->update_flags.bits.scaling_change) || (context == dc->current_state && pipe_ctx->stream->update_flags.bits.scaling)) { From 3bae20137cae6c03f58f96c0bc9f3d46f0bc17d4 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Sun, 5 Apr 2020 16:40:57 -0400 Subject: [PATCH 202/280] drm/amd/display: Calculate scaling ratios on every medium/full update [Why] If a plane isn't being actively enabled or disabled then DC won't always recalculate scaling rects and ratios for the primary plane. This results in only a partial or corrupted rect being displayed on the screen instead of scaling to fit the screen. [How] Add back the logic to recalculate the scaling rects into dc_commit_updates_for_stream since this is the expected place to do it in DC. This was previously removed a few years ago to fix an underscan issue but underscan is still functional now with this change - and it should be, since this is only updating to the latest plane state getting passed in. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Aric Cyr Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 3c9e5ed935dc..512703eeaf82 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2464,7 +2464,7 @@ void dc_commit_updates_for_stream(struct dc *dc, enum surface_update_type update_type; struct dc_state *context; struct dc_context *dc_ctx = dc->ctx; - int i; + int i, j; stream_status = dc_stream_get_status(stream); context = dc->current_state; @@ -2502,6 +2502,17 @@ void dc_commit_updates_for_stream(struct dc *dc, copy_surface_update_to_plane(surface, &srf_updates[i]); + if (update_type >= UPDATE_TYPE_MED) { + for (j = 0; j < dc->res_pool->pipe_count; j++) { + struct pipe_ctx *pipe_ctx = + &context->res_ctx.pipe_ctx[j]; + + if (pipe_ctx->plane_state != surface) + continue; + + resource_build_scaling_params(pipe_ctx); + } + } } copy_stream_update_to_stream(dc, context, stream, stream_update); From d243b6ffde7a3717c38b31dbc9c8c562f066d6ef Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Sun, 5 Apr 2020 16:41:08 -0400 Subject: [PATCH 203/280] drm/amd/display: Make cursor source translation adjustment optional [Why] In some usecases, like tiled display, the stream and plane configuration can be setup in a way where the caller expects DAL to perform the clipping, eg: P0: src_rect(0, 0, w, h) dst_rect(0, 0, w, h) P1: src_rect(w, 0, w, h) dst_rect(0, 0, w, h) Cursor is enabled on both streams with the same position. This can result in double cursor on tiled display, even though this behavior is technically correct from the DC interface point of view. We need a mechanism to control this dynamically. [How] This is something that should live in the DM layer based on detection of the specified configuration but it's not something that we really have enough information to deal with today. Add a flag to the cursor position state that specifies whether we want DC to do the translation or not and make it opt-in and let the DM decide when to do it. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Tony Cheng Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 2 ++ .../drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 10 ++++++++-- .../gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 10 ++++++++-- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index de60411eef11..2167cabd0384 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6283,6 +6283,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, y = 0; } position->enable = true; + position->translate_by_source = true; position->x = x; position->y = y; position->x_hotspot = xorigin; diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 25c50bcab9e9..a8dc3082e3e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -385,6 +385,8 @@ struct dc_cursor_position { */ bool enable; + /* Translate cursor x/y by the source rectangle for each plane. */ + bool translate_by_source; }; struct dc_cursor_mi_param { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 67f7ca346696..c279982947e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2692,9 +2692,15 @@ void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx) * * This translation isn't affected by scaling so it needs to be * done *after* we adjust the position for the scale factor. + * + * This is only done by opt-in for now since there are still + * some usecases like tiled display that might enable the + * cursor on both streams while expecting dc to clip it. */ - pos_cpy.x += pipe_ctx->plane_state->src_rect.x; - pos_cpy.y += pipe_ctx->plane_state->src_rect.y; + if (pos_cpy.translate_by_source) { + pos_cpy.x += pipe_ctx->plane_state->src_rect.x; + pos_cpy.y += pipe_ctx->plane_state->src_rect.y; + } if (pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 37d4a612058a..b0357546471b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -3050,9 +3050,15 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) * * This translation isn't affected by scaling so it needs to be * done *after* we adjust the position for the scale factor. + * + * This is only done by opt-in for now since there are still + * some usecases like tiled display that might enable the + * cursor on both streams while expecting dc to clip it. */ - x_pos += pipe_ctx->plane_state->src_rect.x; - y_pos += pipe_ctx->plane_state->src_rect.y; + if (pos_cpy.translate_by_source) { + x_pos += pipe_ctx->plane_state->src_rect.x; + y_pos += pipe_ctx->plane_state->src_rect.y; + } /** * If the position is negative then we need to add to the hotspot From 6dbebf4da3616a14f43041b81a0881a7343ce5fc Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Sun, 5 Apr 2020 16:41:11 -0400 Subject: [PATCH 204/280] drm/amd/display: Acknowledge wm_optimized_required [Why] If dc->clk_mgr->funcs->are_clock_states_equal is set, then wm_optimized_required is never checked. In that case, when going from a higher mode to a lower mode, wm_optimized_required remains true until the next mode change. [How] - move from else-if to unconditional or Signed-off-by: Joshua Aberback Reviewed-by: Jun Lei Reviewed-by: Nicholas Kazlauskas Reviewed-by: Yongqiang Sun Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 512703eeaf82..8489f1e56892 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1861,8 +1861,9 @@ enum surface_update_type dc_check_update_surfaces_for_stream( // Else we fallback to mem compare. } else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) { dc->optimized_required = true; - } else if (dc->wm_optimized_required) - dc->optimized_required = true; + } + + dc->optimized_required |= dc->wm_optimized_required; } return type; From 72f5b5a308c744573fdbc6c78202c52196d2c162 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Sun, 5 Apr 2020 16:41:12 -0400 Subject: [PATCH 205/280] drm/amd/display: Check for null fclk voltage when parsing clock table [WHY] In cases where a clock table is malformed such that fclk entries have frequencies but not voltages listed, we don't catch the error and set clocks to 0 instead of using hardcoded values as we should. [HOW] Add check for clock tables fclk entry's voltage as well Signed-off-by: Michael Strauss Reviewed-by: Eric Yang Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index ab267ddd4abe..24c5765890fa 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -643,7 +643,7 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params /* Find lowest DPM, FCLK is filled in reverse order*/ for (i = PP_SMU_NUM_FCLK_DPM_LEVELS - 1; i >= 0; i--) { - if (clock_table->FClocks[i].Freq != 0) { + if (clock_table->FClocks[i].Freq != 0 && clock_table->FClocks[i].Vol != 0) { j = i; break; } From 86f93e7447f91ff896b5a561a8e2220442cc8515 Mon Sep 17 00:00:00 2001 From: Vilhelm Prytz Date: Mon, 6 Apr 2020 10:33:34 +0200 Subject: [PATCH 206/280] docs: filesystems: fix typo in qnx6.rst - 'structer' replaced with 'structure' Signed-off-by: Vilhelm Prytz Signed-off-by: Jonathan Corbet --- Documentation/filesystems/qnx6.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/filesystems/qnx6.rst b/Documentation/filesystems/qnx6.rst index b71308314070..fd13433d362c 100644 --- a/Documentation/filesystems/qnx6.rst +++ b/Documentation/filesystems/qnx6.rst @@ -185,7 +185,7 @@ tree structures are treated as system blocks. The rational behind that is that a write request can work on a new snapshot (system area of the inactive - resp. lower serial numbered superblock) while -at the same time there is still a complete stable filesystem structer in the +at the same time there is still a complete stable filesystem structure in the other half of the system area. When finished with writing (a sync write is completed, the maximum sync leap From ed01b03018b3953c26afe123392678fba5e60bb8 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Thu, 2 Apr 2020 19:26:13 +0200 Subject: [PATCH 207/280] docs: kernel-parameters.txt: Remove nompx x86/mpx was removed in commit 45fc24e89b7c ("x86/mpx: remove MPX from arch/x86"), this removes the documentation of parameter nompx. Fixes: 45fc24e89b7c ("x86/mpx: remove MPX from arch/x86") Signed-off-by: Jimmy Assarsson Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20200402172614.3020-1-jimmyassarsson@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b3b5aa7408df..d39a37c900d1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -982,10 +982,6 @@ Documentation/admin-guide/dynamic-debug-howto.rst for details. - nompx [X86] Disables Intel Memory Protection Extensions. - See Documentation/x86/intel_mpx.rst for more - information about the feature. - nopku [X86] Disable Memory Protection Keys CPU feature found in some Intel CPUs. From cd4ca34153cc533626a65dc1fb52ea49cb3a6c65 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Thu, 2 Apr 2020 19:26:14 +0200 Subject: [PATCH 208/280] docs: kernel-parameters.txt: Fix broken references Fix remaining broken references in kernel-parameters.txt. Cc: Jonathan Corbet Signed-off-by: Jimmy Assarsson Link: https://lore.kernel.org/r/20200402172614.3020-2-jimmyassarsson@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d39a37c900d1..f7899914f32a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -675,7 +675,7 @@ coredump_filter= [KNL] Change the default value for /proc//coredump_filter. - See also Documentation/filesystems/proc.txt. + See also Documentation/filesystems/proc.rst. coresight_cpu_debug.enable [ARM,ARM64] @@ -952,7 +952,7 @@ edid/1680x1050.bin, or edid/1920x1080.bin is given and no file with the same name exists. Details and instructions how to build your own EDID data are - available in Documentation/driver-api/edid.rst. An EDID + available in Documentation/admin-guide/edid.rst. An EDID data set will only be used for a particular connector, if its name and a colon are prepended to the EDID name. Each connector may use a unique EDID data From 869903904242bc0a989f6f47b717ffc0bb3c9864 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Fri, 3 Apr 2020 17:07:01 +0000 Subject: [PATCH 209/280] Documentation: sysrq: fix RST formatting "On x86" and "On SPARC" are now definition list terms, like "On PowerPC", "On other", and "On all". The Credits list is now a bulleted list, like lots of Credits lists in other files. This prevents the list from becoming a single long, unpunctuated sentence in the generated documentation. I also did a couple of other tiny readability improvements to the "How do I use the magic SysRq key?" section while I was there. Signed-off-by: Alyssa Ross Link: https://lore.kernel.org/r/20200403170701.10852-1-hi@alyssa.is Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysrq.rst | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/sysrq.rst b/Documentation/admin-guide/sysrq.rst index 72b2cfb066f4..a46209f4636c 100644 --- a/Documentation/admin-guide/sysrq.rst +++ b/Documentation/admin-guide/sysrq.rst @@ -48,9 +48,10 @@ always allowed (by a user with admin privileges). How do I use the magic SysRq key? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -On x86 - You press the key combo :kbd:`ALT-SysRq-`. +On x86 + You press the key combo :kbd:`ALT-SysRq-`. -.. note:: + .. note:: Some keyboards may not have a key labeled 'SysRq'. The 'SysRq' key is also known as the 'Print Screen' key. Also some keyboards cannot @@ -58,14 +59,15 @@ On x86 - You press the key combo :kbd:`ALT-SysRq-`. have better luck with press :kbd:`Alt`, press :kbd:`SysRq`, release :kbd:`SysRq`, press :kbd:``, release everything. -On SPARC - You press :kbd:`ALT-STOP-`, I believe. +On SPARC + You press :kbd:`ALT-STOP-`, I believe. On the serial console (PC style standard serial ports only) You send a ``BREAK``, then within 5 seconds a command key. Sending ``BREAK`` twice is interpreted as a normal BREAK. On PowerPC - Press :kbd:`ALT - Print Screen` (or :kbd:`F13`) - :kbd:``, + Press :kbd:`ALT - Print Screen` (or :kbd:`F13`) - :kbd:``. :kbd:`Print Screen` (or :kbd:`F13`) - :kbd:`` may suffice. On other @@ -73,7 +75,7 @@ On other let me know so I can add them to this section. On all - write a character to /proc/sysrq-trigger. e.g.:: + Write a character to /proc/sysrq-trigger. e.g.:: echo t > /proc/sysrq-trigger @@ -282,7 +284,7 @@ Just ask them on the linux-kernel mailing list: Credits ~~~~~~~ -Written by Mydraal -Updated by Adam Sulmicki -Updated by Jeremy M. Dolan 2001/01/28 10:15:59 -Added to by Crutcher Dunnavant +- Written by Mydraal +- Updated by Adam Sulmicki +- Updated by Jeremy M. Dolan 2001/01/28 10:15:59 +- Added to by Crutcher Dunnavant From 2abf99b94c17405ae991aceedc7b3ca3e70ea299 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 30 Mar 2020 08:01:32 +0200 Subject: [PATCH 210/280] docs: driver-api: address duplicate label warning Delete identically named subsection to fix Documentation warning: Documentation/driver-api/w1.rst:11: \ WARNING: duplicate label driver-api/w1:w1 api internal to the kernel, \ other instance in Documentation/driver-api/w1.rst Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20200330060132.7773-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/driver-api/w1.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/driver-api/w1.rst b/Documentation/driver-api/w1.rst index 9963cca788a1..bda3ad60f655 100644 --- a/Documentation/driver-api/w1.rst +++ b/Documentation/driver-api/w1.rst @@ -7,9 +7,6 @@ W1: Dallas' 1-wire bus W1 API internal to the kernel ============================= -W1 API internal to the kernel ------------------------------ - include/linux/w1.h ~~~~~~~~~~~~~~~~~~ From 19905fe667b6d91305bd1db0fe98358668866114 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20L=C3=B6nnegren?= Date: Thu, 12 Mar 2020 17:13:55 -0300 Subject: [PATCH 211/280] Documentation: driver-api/usb/writing_usb_driver.rst Updates documentation links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In writing_usb_driver.rst: Remove link to https://www.qbik.ch/usb/devices/ since it seems to be inactive since 2013 Update link to linux-usb mailing list archive Signed-off-by: Joakim Lönnegren Link: https://lore.kernel.org/r/20200312201354.285839-1-joakimlonnegren@gmail.om Signed-off-by: Jonathan Corbet --- Documentation/driver-api/usb/writing_usb_driver.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Documentation/driver-api/usb/writing_usb_driver.rst b/Documentation/driver-api/usb/writing_usb_driver.rst index 4fe1c06b6a13..0b3d9ff221bb 100644 --- a/Documentation/driver-api/usb/writing_usb_driver.rst +++ b/Documentation/driver-api/usb/writing_usb_driver.rst @@ -314,11 +314,8 @@ http://www.linux-usb.org/ Linux Hotplug Project: http://linux-hotplug.sourceforge.net/ -Linux USB Working Devices List: -http://www.qbik.ch/usb/devices/ - -linux-usb-devel Mailing List Archives: -http://marc.theaimsgroup.com/?l=linux-usb-devel +linux-usb Mailing List Archives: +https://lore.kernel.org/linux-usb/ Programming Guide for Linux USB Device Drivers: http://lmu.web.psi.ch/docu/manuals/software_manuals/linux_sl/usb_linux_programming_guide.pdf From d3ef5536274faf89e626276b833be122a16bdb81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 4 Apr 2020 08:51:20 +0200 Subject: [PATCH 212/280] block: fix busy device checking in blk_drop_partitions bd_super is only set by get_tree_bdev and mount_bdev, and thus not by other openers like btrfs or the XFS realtime and log devices, as well as block devices directly opened from user space. Check bd_openers instead. Fixes: 77032ca66f86 ("Return EBUSY from BLKRRPART for mounted whole-dev fs") Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/partitions/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index b79c4513629b..1a0a829d8416 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -496,7 +496,7 @@ int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev) if (!disk_part_scan_enabled(disk)) return 0; - if (bdev->bd_part_count || bdev->bd_super) + if (bdev->bd_part_count || bdev->bd_openers) return -EBUSY; res = invalidate_partition(disk, 0); if (res) From 8305f72f952cff21ce8109dc1ea4b321c8efc5af Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 27 Mar 2019 17:02:54 +0800 Subject: [PATCH 213/280] libata: Return correct status in sata_pmp_eh_recover_pm() when ATA_DFLAG_DETACH is set During system resume from suspend, this can be observed on ASM1062 PMP controller: ata10.01: SATA link down (SStatus 0 SControl 330) ata10.02: hard resetting link ata10.02: SATA link down (SStatus 0 SControl 330) ata10.00: configured for UDMA/133 Kernel panic - not syncing: stack-protector: Kernel in: sata_pmp_eh_recover+0xa2b/0xa40 CPU: 2 PID: 230 Comm: scsi_eh_9 Tainted: P OE #49-Ubuntu Hardware name: System manufacturer System Product 1001 12/10/2017 Call Trace: dump_stack+0x63/0x8b panic+0xe4/0x244 ? sata_pmp_eh_recover+0xa2b/0xa40 __stack_chk_fail+0x19/0x20 sata_pmp_eh_recover+0xa2b/0xa40 ? ahci_do_softreset+0x260/0x260 [libahci] ? ahci_do_hardreset+0x140/0x140 [libahci] ? ata_phys_link_offline+0x60/0x60 ? ahci_stop_engine+0xc0/0xc0 [libahci] sata_pmp_error_handler+0x22/0x30 ahci_error_handler+0x45/0x80 [libahci] ata_scsi_port_error_handler+0x29b/0x770 ? ata_scsi_cmd_error_handler+0x101/0x140 ata_scsi_error+0x95/0xd0 ? scsi_try_target_reset+0x90/0x90 scsi_error_handler+0xd0/0x5b0 kthread+0x121/0x140 ? scsi_eh_get_sense+0x200/0x200 ? kthread_create_worker_on_cpu+0x70/0x70 ret_from_fork+0x22/0x40 Kernel Offset: 0xcc00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Since sata_pmp_eh_recover_pmp() doens't set rc when ATA_DFLAG_DETACH is set, sata_pmp_eh_recover() continues to run. During retry it triggers the stack protector. Set correct rc in sata_pmp_eh_recover_pmp() to let sata_pmp_eh_recover() jump to pmp_fail directly. BugLink: https://bugs.launchpad.net/bugs/1821434 Cc: stable@vger.kernel.org Signed-off-by: Kai-Heng Feng Signed-off-by: Jens Axboe --- drivers/ata/libata-pmp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 3ff14071617c..79f2aeeb482a 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -763,6 +763,7 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap, if (dev->flags & ATA_DFLAG_DETACH) { detach = 1; + rc = -ENODEV; goto fail; } From 3aadcf83820a8ae675a3ad88a6e889eb9638df7f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 5 Apr 2020 12:51:20 +0100 Subject: [PATCH 214/280] ata: ahci-imx: remove redundant assignment to ret The variable ret is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Jens Axboe --- drivers/ata/ahci_imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c index 948d2c6557f3..388baf528fa8 100644 --- a/drivers/ata/ahci_imx.c +++ b/drivers/ata/ahci_imx.c @@ -782,7 +782,7 @@ static int ahci_imx_softreset(struct ata_link *link, unsigned int *class, struct ata_host *host = dev_get_drvdata(ap->dev); struct ahci_host_priv *hpriv = host->private_data; struct imx_ahci_priv *imxpriv = hpriv->plat_data; - int ret = -EIO; + int ret; if (imxpriv->type == AHCI_IMX53) ret = ahci_pmp_retry_srst_ops.softreset(link, class, deadline); From 894fba7f434a408ec3f4d4164d2b300ee9263d38 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 7 Feb 2020 18:00:16 +0800 Subject: [PATCH 215/280] ata: ahci: Add sysfs attribute to show remapped NVMe device count Add a new sysfs attribute to show how many NVMe devices are remapped. Userspace like distro installer can use this info to ask user to change the BIOS setting. Signed-off-by: Kai-Heng Feng Signed-off-by: Jens Axboe --- drivers/ata/ahci.c | 28 ++++++++++++++++++++++++---- drivers/ata/ahci.h | 1 + 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index ad0185c8dcee..f23a99278c1d 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1495,7 +1495,7 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) static void ahci_remap_check(struct pci_dev *pdev, int bar, struct ahci_host_priv *hpriv) { - int i, count = 0; + int i; u32 cap; /* @@ -1516,13 +1516,14 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, continue; /* We've found a remapped device */ - count++; + hpriv->remapped_nvme++; } - if (!count) + if (!hpriv->remapped_nvme) return; - dev_warn(&pdev->dev, "Found %d remapped NVMe devices.\n", count); + dev_warn(&pdev->dev, "Found %u remapped NVMe devices.\n", + hpriv->remapped_nvme); dev_warn(&pdev->dev, "Switch your BIOS from RAID to AHCI mode to use them.\n"); @@ -1642,6 +1643,18 @@ static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hp } } +static ssize_t remapped_nvme_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ata_host *host = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = host->private_data; + + return sprintf(buf, "%u\n", hpriv->remapped_nvme); +} + +static DEVICE_ATTR_RO(remapped_nvme); + static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { unsigned int board_id = ent->driver_data; @@ -1745,6 +1758,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* detect remapped nvme devices */ ahci_remap_check(pdev, ahci_pci_bar, hpriv); + sysfs_add_file_to_group(&pdev->dev.kobj, + &dev_attr_remapped_nvme.attr, + NULL); + /* must set flag prior to save config in order to take effect */ if (ahci_broken_devslp(pdev)) hpriv->flags |= AHCI_HFLAG_NO_DEVSLP; @@ -1896,6 +1913,9 @@ static void ahci_shutdown_one(struct pci_dev *pdev) static void ahci_remove_one(struct pci_dev *pdev) { + sysfs_remove_file_from_group(&pdev->dev.kobj, + &dev_attr_remapped_nvme.attr, + NULL); pm_runtime_get_noresume(&pdev->dev); ata_pci_remove_one(pdev); } diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 3dbf398c92ea..d991dd46e89c 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -336,6 +336,7 @@ struct ahci_host_priv { u32 em_loc; /* enclosure management location */ u32 em_buf_sz; /* EM buffer size in byte */ u32 em_msg_type; /* EM message type */ + u32 remapped_nvme; /* NVMe remapped device count */ bool got_runtime_pm; /* Did we do pm_runtime_get? */ struct clk *clks[AHCI_MAX_CLKS]; /* Optional */ struct reset_control *rsts; /* Optional */ From 0e1b4271078787d3408d3dd314d80b290578cc00 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 8 Apr 2020 10:46:05 +0800 Subject: [PATCH 216/280] x86/xen: make xen_pvmmu_arch_setup() static Fix the following sparse warning: arch/x86/xen/setup.c:998:12: warning: symbol 'xen_pvmmu_arch_setup' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Jason Yan Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20200408024605.42394-1-yanaijie@huawei.com Signed-off-by: Juergen Gross --- arch/x86/xen/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 33b0e20df7fc..1a2d8a50dac4 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -985,7 +985,7 @@ void xen_enable_syscall(void) #endif /* CONFIG_X86_64 */ } -void __init xen_pvmmu_arch_setup(void) +static void __init xen_pvmmu_arch_setup(void) { HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); From 6de084803074dcead5da5bf5c82d189b4cf4b6b5 Mon Sep 17 00:00:00 2001 From: Vilhelm Prytz Date: Mon, 6 Apr 2020 16:01:22 +0200 Subject: [PATCH 217/280] Documentation: firmware-guide: ACPI: fix table alignment in namespace.rst Fixe the alignment in the ACPI block diagram (RST table) by adding missing spaces Signed-off-by: Vilhelm Prytz Signed-off-by: Rafael J. Wysocki --- Documentation/firmware-guide/acpi/namespace.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/firmware-guide/acpi/namespace.rst b/Documentation/firmware-guide/acpi/namespace.rst index 3eb763d6656d..6193582a2204 100644 --- a/Documentation/firmware-guide/acpi/namespace.rst +++ b/Documentation/firmware-guide/acpi/namespace.rst @@ -56,13 +56,13 @@ are illustrated in the following diagram:: +- - - -+ | +-------------------| | | Entry | - - - - - - - -+ | | Definition Blocks | | +- - - -+ | | +-------------------+ | - | | +- - - - - - - - - -+ | - +-|->| SSDT | | + | | +- - - - - - - - - -+ | + +-|->| SSDT | | | +-------------------+ | | | Definition Blocks | | | +- - - - - - - - - -+ | +------------------------+ - | + | OSPM Loading | \|/ +----------------+ From b5432a699fdff9266a475771bd46d740f40f76aa Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 8 Apr 2020 10:44:12 +0800 Subject: [PATCH 218/280] ACPI, x86/boot: make acpi_nobgrt static Fix the following sparse warning: arch/x86/kernel/acpi/boot.c:48:5: warning: symbol 'acpi_nobgrt' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/boot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 1ae5439a9a85..683ed9e12e6b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -45,7 +45,7 @@ EXPORT_SYMBOL(acpi_disabled); #define PREFIX "ACPI: " int acpi_noirq; /* skip ACPI IRQ initialization */ -int acpi_nobgrt; /* skip ACPI BGRT */ +static int acpi_nobgrt; /* skip ACPI BGRT */ int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */ EXPORT_SYMBOL(acpi_pci_disabled); From 4902f7fcb3bcb4ce088db97bfd194401a784cc60 Mon Sep 17 00:00:00 2001 From: Yihao Wu Date: Wed, 8 Apr 2020 18:11:36 +0800 Subject: [PATCH 219/280] cpuidle-haltpoll: Fix small typo Fix a spelling typo in cpuidle-haltpoll.c. Signed-off-by: Yihao Wu [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-haltpoll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index db124bc1ca2c..fcc53215bac8 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -94,7 +94,7 @@ static void haltpoll_uninit(void) haltpoll_cpuidle_devices = NULL; } -static bool haltpool_want(void) +static bool haltpoll_want(void) { return kvm_para_has_hint(KVM_HINTS_REALTIME) || force; } @@ -110,7 +110,7 @@ static int __init haltpoll_init(void) cpuidle_poll_state_init(drv); - if (!kvm_para_available() || !haltpool_want()) + if (!kvm_para_available() || !haltpoll_want()) return -ENODEV; ret = cpuidle_register_driver(drv); From ec95f1dedc9c64ac5a8b0bdb7c276936c70fdedd Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 8 Apr 2020 08:52:40 -0400 Subject: [PATCH 220/280] orangefs: get rid of knob code... Christoph Hellwig sent in a reversion of "orangefs: remember count when reading." because: ->read_iter calls can race with each other and one or more ->flush calls. Remove the the scheme to store the read count in the file private data as is is completely racy and can cause use after free or double free conditions Christoph's reversion caused Orangefs not to work or to compile. I added a patch that fixed that, but intel's kbuild test robot pointed out that sending Christoph's patch followed by my patch upstream, it would break bisection because of the failure to compile. So I have combined the reversion plus my patch... here's the commit message that was in my patch: Logically, optimal Orangefs "pages" are 4 megabytes. Reading large Orangefs files 4096 bytes at a time is like trying to kick a dead whale down the beach. Before Christoph's "Revert orangefs: remember count when reading." I tried to give users a knob whereby they could, for example, use "count" in read(2) or bs with dd(1) to get whatever they considered an appropriate amount of bytes at a time from Orangefs and fill as many page cache pages as they could at once. Without the racy code that Christoph reverted Orangefs won't even compile, much less work. So this replaces the logic that used the private file data that Christoph reverted with a static number of bytes to read from Orangefs. I ran tests like the following to determine what a reasonable static number of bytes might be: dd if=/pvfsmnt/asdf of=/dev/null count=128 bs=4194304 dd if=/pvfsmnt/asdf of=/dev/null count=256 bs=2097152 dd if=/pvfsmnt/asdf of=/dev/null count=512 bs=1048576 . . . dd if=/pvfsmnt/asdf of=/dev/null count=4194304 bs=128 Reads seem faster using the static number, so my "knob code" wasn't just racy, it wasn't even a good idea... Signed-off-by: Mike Marshall Reported-by: kbuild test robot --- fs/orangefs/file.c | 26 +---------------------- fs/orangefs/inode.c | 39 ++++++----------------------------- fs/orangefs/orangefs-kernel.h | 4 ---- 3 files changed, 7 insertions(+), 62 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index c740159d9ad1..173e6ea57a47 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -346,23 +346,8 @@ static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { int ret; - struct orangefs_read_options *ro; - orangefs_stats.reads++; - /* - * Remember how they set "count" in read(2) or pread(2) or whatever - - * users can use count as a knob to control orangefs io size and later - * we can try to help them fill as many pages as possible in readpage. - */ - if (!iocb->ki_filp->private_data) { - iocb->ki_filp->private_data = kmalloc(sizeof *ro, GFP_KERNEL); - if (!iocb->ki_filp->private_data) - return(ENOMEM); - ro = iocb->ki_filp->private_data; - ro->blksiz = iter->count; - } - down_read(&file_inode(iocb->ki_filp)->i_rwsem); ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); if (ret) @@ -650,12 +635,6 @@ static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl) return rc; } -static int orangefs_file_open(struct inode * inode, struct file *file) -{ - file->private_data = NULL; - return generic_file_open(inode, file); -} - static int orangefs_flush(struct file *file, fl_owner_t id) { /* @@ -669,9 +648,6 @@ static int orangefs_flush(struct file *file, fl_owner_t id) struct inode *inode = file->f_mapping->host; int r; - kfree(file->private_data); - file->private_data = NULL; - if (inode->i_state & I_DIRTY_TIME) { spin_lock(&inode->i_lock); inode->i_state &= ~I_DIRTY_TIME; @@ -694,7 +670,7 @@ const struct file_operations orangefs_file_operations = { .lock = orangefs_lock, .unlocked_ioctl = orangefs_ioctl, .mmap = orangefs_file_mmap, - .open = orangefs_file_open, + .open = generic_file_open, .flush = orangefs_flush, .release = orangefs_file_release, .fsync = orangefs_fsync, diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 961c0fd8675a..12ae630fbed7 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -259,46 +259,19 @@ static int orangefs_readpage(struct file *file, struct page *page) pgoff_t index; /* which page */ struct page *next_page; char *kaddr; - struct orangefs_read_options *ro = file->private_data; loff_t read_size; - loff_t roundedup; int buffer_index = -1; /* orangefs shared memory slot */ int slot_index; /* index into slot */ int remaining; /* - * If they set some miniscule size for "count" in read(2) - * (for example) then let's try to read a page, or the whole file - * if it is smaller than a page. Once "count" goes over a page - * then lets round up to the highest page size multiple that is - * less than or equal to "count" and do that much orangefs IO and - * try to fill as many pages as we can from it. - * - * "count" should be represented in ro->blksiz. - * - * inode->i_size = file size. + * Get up to this many bytes from Orangefs at a time and try + * to fill them into the page cache at once. Tests with dd made + * this seem like a reasonable static number, if there was + * interest perhaps this number could be made setable through + * sysfs... */ - if (ro) { - if (ro->blksiz < PAGE_SIZE) { - if (inode->i_size < PAGE_SIZE) - read_size = inode->i_size; - else - read_size = PAGE_SIZE; - } else { - roundedup = ((PAGE_SIZE - 1) & ro->blksiz) ? - ((ro->blksiz + PAGE_SIZE) & ~(PAGE_SIZE -1)) : - ro->blksiz; - if (roundedup > inode->i_size) - read_size = inode->i_size; - else - read_size = roundedup; - - } - } else { - read_size = PAGE_SIZE; - } - if (!read_size) - read_size = PAGE_SIZE; + read_size = 524288; if (PageDirty(page)) orangefs_launder_page(page); diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index ed67f39fa7ce..e12aeb9623d6 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -239,10 +239,6 @@ struct orangefs_write_range { kgid_t gid; }; -struct orangefs_read_options { - ssize_t blksiz; -}; - extern struct orangefs_stats orangefs_stats; /* From 0e393a9a8f2a450862964451715d68e9a96a9c34 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 8 Apr 2020 09:05:45 -0400 Subject: [PATCH 221/280] orangefs: don't mess with I_DIRTY_TIMES in orangefs_flush Christoph Hellwig noticed that we were doing some unnecessary work in orangefs_flush: orangefs_flush just writes out data on every close(2) call. There is no need to change anything about the dirty state, especially as orangefs doesn't treat I_DIRTY_TIMES special in any way. The code seems to come from partially open coding vfs_fsync. He sent in a patch with the above commit message and also a patch that was a reversion of another Orangefs patch I had sent upstream a while ago. I had to fix his reversion patch so that it would compile which caused his "don't mess with I_DIRTY_TIMES" patch to fail to apply. So here I have just remade his patch and applied it after the fixed reversion patch. Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 173e6ea57a47..af375e049aae 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -645,16 +645,8 @@ static int orangefs_flush(struct file *file, fl_owner_t id) * on an explicit fsync call. This duplicates historical OrangeFS * behavior. */ - struct inode *inode = file->f_mapping->host; int r; - if (inode->i_state & I_DIRTY_TIME) { - spin_lock(&inode->i_lock); - inode->i_state &= ~I_DIRTY_TIME; - spin_unlock(&inode->i_lock); - mark_inode_dirty_sync(inode); - } - r = filemap_write_and_wait_range(file->f_mapping, 0, LLONG_MAX); if (r > 0) return 0; From 1d3aa4a5516d2e4933fe3cca11d3349ef63bc547 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 8 Apr 2020 15:56:45 +0200 Subject: [PATCH 222/280] ALSA: hda/realtek - Add quirk for MSI GL63 MSI GL63 laptop requires the similar quirk like other MSI models, ALC1220_FIXUP_CLEVO_P950. The board BIOS doesn't provide a PCI SSID for the device, hence we need to take the codec SSID (1462:1275) instead. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=207157 Cc: Link: https://lore.kernel.org/r/20200408135645.21896-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 019239190f6d..de2826f90d34 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2455,6 +2455,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1462, 0x1275, "MSI-GL63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), From 2a48218f8e23d47bd3e23cfdfb8aa9066f7dc3e6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 8 Apr 2020 16:04:48 +0200 Subject: [PATCH 223/280] ALSA: usb-audio: Add mixer workaround for TRX40 and co Some recent boards (supposedly with a new AMD platform) contain the USB audio class 2 device that is often tied with HD-audio. The device exposes an Input Gain Pad control (id=19, control=12) but this node doesn't behave correctly, returning an error for each inquiry of GET_MIN and GET_MAX that should have been mandatory. As a workaround, simply ignore this node by adding a usbmix_name_map table entry. The currently known devices are: * 0414:a002 - Gigabyte TRX40 Aorus Pro WiFi * 0b05:1916 - ASUS ROG Zenith II * 0b05:1917 - ASUS ROG Strix * 0db0:0d64 - MSI TRX40 Creator * 0db0:543d - MSI TRX40 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=206543 Cc: Link: https://lore.kernel.org/r/20200408140449.22319-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer_maps.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 5ebca8013840..72b575c34860 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -359,6 +359,14 @@ static const struct usbmix_name_map corsair_virtuoso_map[] = { { 0 } }; +/* Some mobos shipped with a dummy HD-audio show the invalid GET_MIN/GET_MAX + * response for Input Gain Pad (id=19, control=12). Skip it. + */ +static const struct usbmix_name_map asus_rog_map[] = { + { 19, NULL, 12 }, /* FU, Input Gain Pad */ + {} +}; + /* * Control map entries */ @@ -488,6 +496,26 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x1b1c, 0x0a42), .map = corsair_virtuoso_map, }, + { /* Gigabyte TRX40 Aorus Pro WiFi */ + .id = USB_ID(0x0414, 0xa002), + .map = asus_rog_map, + }, + { /* ASUS ROG Zenith II */ + .id = USB_ID(0x0b05, 0x1916), + .map = asus_rog_map, + }, + { /* ASUS ROG Strix */ + .id = USB_ID(0x0b05, 0x1917), + .map = asus_rog_map, + }, + { /* MSI TRX40 Creator */ + .id = USB_ID(0x0db0, 0x0d64), + .map = asus_rog_map, + }, + { /* MSI TRX40 */ + .id = USB_ID(0x0db0, 0x543d), + .map = asus_rog_map, + }, { 0 } /* terminator */ }; From 3c6fd1f07ed03a04debbb9a9d782205f1ef5e2ab Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 8 Apr 2020 16:04:49 +0200 Subject: [PATCH 224/280] ALSA: hda: Add driver blacklist The recent AMD platform exposes an HD-audio bus but without any actual codecs, which is internally tied with a USB-audio device, supposedly. It results in "no codecs" error of HD-audio bus driver, and it's nothing but a waste of resources. This patch introduces a static blacklist table for skipping such a known bogus PCI SSID entry. As of writing this patch, the known SSIDs are: * 1043:874f - ASUS ROG Zenith II / Strix * 1462:cb59 - MSI TRX40 Creator * 1462:cb60 - MSI TRX40 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=206543 Cc: Link: https://lore.kernel.org/r/20200408140449.22319-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 92a042e34d3e..bd093593f8fb 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2076,6 +2076,17 @@ static void pcm_mmap_prepare(struct snd_pcm_substream *substream, #endif } +/* Blacklist for skipping the whole probe: + * some HD-audio PCI entries are exposed without any codecs, and such devices + * should be ignored from the beginning. + */ +static const struct snd_pci_quirk driver_blacklist[] = { + SND_PCI_QUIRK(0x1043, 0x874f, "ASUS ROG Zenith II / Strix", 0), + SND_PCI_QUIRK(0x1462, 0xcb59, "MSI TRX40 Creator", 0), + SND_PCI_QUIRK(0x1462, 0xcb60, "MSI TRX40", 0), + {} +}; + static const struct hda_controller_ops pci_hda_ops = { .disable_msi_reset_irq = disable_msi_reset_irq, .pcm_mmap_prepare = pcm_mmap_prepare, @@ -2092,6 +2103,11 @@ static int azx_probe(struct pci_dev *pci, bool schedule_probe; int err; + if (snd_pci_quirk_lookup(pci, driver_blacklist)) { + dev_info(&pci->dev, "Skipping the blacklisted device\n"); + return -ENODEV; + } + if (dev >= SNDRV_CARDS) return -ENODEV; if (!enable[dev]) { From 08a1d26eb894a9dcf79f674558a284ad1ffef517 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 8 Apr 2020 09:20:54 -0600 Subject: [PATCH 225/280] io_uring: ensure openat sets O_LARGEFILE if needed OPENAT2 correctly sets O_LARGEFILE if it has to, but that escaped the OPENAT opcode. Dmitry reports that his test case that compares openat() and IORING_OP_OPENAT sees failures on large files: *** sync openat openat succeeded sync write at offset 0 write succeeded sync write at offset 4294967296 write succeeded *** sync openat openat succeeded io_uring write at offset 0 write succeeded io_uring write at offset 4294967296 write succeeded *** io_uring openat openat succeeded sync write at offset 0 write succeeded sync write at offset 4294967296 write failed: File too large *** io_uring openat openat succeeded io_uring write at offset 0 write succeeded io_uring write at offset 4294967296 write failed: File too large Ensure we set O_LARGEFILE, if force_o_largefile() is true. Cc: stable@vger.kernel.org # v5.6 Fixes: 15b71abe7b52 ("io_uring: add support for IORING_OP_OPENAT") Reported-by: Dmitry Kadashev Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 773f55c49cd8..e71aa42e102a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2957,6 +2957,8 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->open.how.mode = READ_ONCE(sqe->len); fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); req->open.how.flags = READ_ONCE(sqe->open_flags); + if (force_o_largefile()) + req->open.how.flags |= O_LARGEFILE; req->open.filename = getname(fname); if (IS_ERR(req->open.filename)) { From 45097daea2f4e89bdb1c98359f78d0d6feb8e5c8 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Wed, 8 Apr 2020 22:29:58 +0800 Subject: [PATCH 226/280] io_uring: do not always copy iovec in io_req_map_rw() In io_read_prep() or io_write_prep(), io_req_map_rw() takes struct io_async_rw's fast_iov as argument to call io_import_iovec(), and if io_import_iovec() uses struct io_async_rw's fast_iov as valid iovec array, later indeed io_req_map_rw() does not need to do the memcpy operation, because they are same pointers. Signed-off-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e71aa42e102a..b06188a50af4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2493,8 +2493,9 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size, req->io->rw.iov = iovec; if (!req->io->rw.iov) { req->io->rw.iov = req->io->rw.fast_iov; - memcpy(req->io->rw.iov, fast_iov, - sizeof(struct iovec) * iter->nr_segs); + if (req->io->rw.iov != fast_iov) + memcpy(req->io->rw.iov, fast_iov, + sizeof(struct iovec) * iter->nr_segs); } else { req->flags |= REQ_F_NEED_CLEANUP; } From 709b302faddfac757d87df2080f900eccb1dc9e2 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 8 Apr 2020 08:58:43 +0300 Subject: [PATCH 227/280] io_uring: simplify io_get_sqring Make io_get_sqring() care only about sqes themselves, not initialising the io_kiocb. Also, split it into get + consume, that will be helpful in the future. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b06188a50af4..08f520456db8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5781,8 +5781,7 @@ static void io_commit_sqring(struct io_ring_ctx *ctx) * used, it's important that those reads are done through READ_ONCE() to * prevent a re-load down the line. */ -static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req, - const struct io_uring_sqe **sqe_ptr) +static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) { u32 *sq_array = ctx->sq_array; unsigned head; @@ -5796,25 +5795,18 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req, * though the application is the one updating it. */ head = READ_ONCE(sq_array[ctx->cached_sq_head & ctx->sq_mask]); - if (likely(head < ctx->sq_entries)) { - /* - * All io need record the previous position, if LINK vs DARIN, - * it can be used to mark the position of the first IO in the - * link list. - */ - req->sequence = ctx->cached_sq_head; - *sqe_ptr = &ctx->sq_sqes[head]; - req->opcode = READ_ONCE((*sqe_ptr)->opcode); - req->user_data = READ_ONCE((*sqe_ptr)->user_data); - ctx->cached_sq_head++; - return true; - } + if (likely(head < ctx->sq_entries)) + return &ctx->sq_sqes[head]; /* drop invalid entries */ - ctx->cached_sq_head++; ctx->cached_sq_dropped++; WRITE_ONCE(ctx->rings->sq_dropped, ctx->cached_sq_dropped); - return false; + return NULL; +} + +static inline void io_consume_sqe(struct io_ring_ctx *ctx) +{ + ctx->cached_sq_head++; } static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, @@ -5858,11 +5850,23 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, submitted = -EAGAIN; break; } - if (!io_get_sqring(ctx, req, &sqe)) { + sqe = io_get_sqe(ctx); + if (!sqe) { __io_req_do_free(req); + io_consume_sqe(ctx); break; } + /* + * All io need record the previous position, if LINK vs DARIN, + * it can be used to mark the position of the first IO in the + * link list. + */ + req->sequence = ctx->cached_sq_head; + req->opcode = READ_ONCE(sqe->opcode); + req->user_data = READ_ONCE(sqe->user_data); + io_consume_sqe(ctx); + /* will complete beyond this point, count as submitted */ submitted++; From b1e50e549b1372d9742509230dc4af7dd521d984 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 8 Apr 2020 08:58:44 +0300 Subject: [PATCH 228/280] io_uring: alloc req only after getting sqe As io_get_sqe() split into 2 stage get/consume, get an sqe before allocating io_kiocb, so no free_req*() for a failure case is needed, and inline back __io_req_do_free(), which has only 1 user. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 08f520456db8..845c173d9282 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1354,14 +1354,6 @@ static inline void io_put_file(struct io_kiocb *req, struct file *file, fput(file); } -static void __io_req_do_free(struct io_kiocb *req) -{ - if (likely(!io_is_fallback_req(req))) - kmem_cache_free(req_cachep, req); - else - clear_bit_unlock(0, (unsigned long *) req->ctx->fallback_req); -} - static void __io_req_aux_free(struct io_kiocb *req) { if (req->flags & REQ_F_NEED_CLEANUP) @@ -1392,7 +1384,10 @@ static void __io_free_req(struct io_kiocb *req) } percpu_ref_put(&req->ctx->refs); - __io_req_do_free(req); + if (likely(!io_is_fallback_req(req))) + kmem_cache_free(req_cachep, req); + else + clear_bit_unlock(0, (unsigned long *) req->ctx->fallback_req); } struct req_batch { @@ -5844,18 +5839,17 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, struct io_kiocb *req; int err; + sqe = io_get_sqe(ctx); + if (unlikely(!sqe)) { + io_consume_sqe(ctx); + break; + } req = io_get_req(ctx, statep); if (unlikely(!req)) { if (!submitted) submitted = -EAGAIN; break; } - sqe = io_get_sqe(ctx); - if (!sqe) { - __io_req_do_free(req); - io_consume_sqe(ctx); - break; - } /* * All io need record the previous position, if LINK vs DARIN, From 0553b8bda8709c47863eab3fff7ac32ad04ca52b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 8 Apr 2020 08:58:45 +0300 Subject: [PATCH 229/280] io_uring: remove req init from io_get_req() io_get_req() do two different things: io_kiocb allocation and initialisation. Move init part out of it and rename into io_alloc_req(). It's simpler this way and also have better data locality. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 53 ++++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 845c173d9282..7f9bf8b6e6af 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1293,8 +1293,8 @@ static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx) return NULL; } -static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, - struct io_submit_state *state) +static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx, + struct io_submit_state *state) { gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; struct io_kiocb *req; @@ -1327,22 +1327,9 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, req = state->reqs[state->free_reqs]; } -got_it: - req->io = NULL; - req->file = NULL; - req->ctx = ctx; - req->flags = 0; - /* one is dropped after submission, the other at completion */ - refcount_set(&req->refs, 2); - req->task = NULL; - req->result = 0; - INIT_IO_WORK(&req->work, io_wq_submit_work); return req; fallback: - req = io_get_fallback_req(ctx); - if (req) - goto got_it; - return NULL; + return io_get_fallback_req(ctx); } static inline void io_put_file(struct io_kiocb *req, struct file *file, @@ -5804,6 +5791,28 @@ static inline void io_consume_sqe(struct io_ring_ctx *ctx) ctx->cached_sq_head++; } +static void io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + /* + * All io need record the previous position, if LINK vs DARIN, + * it can be used to mark the position of the first IO in the + * link list. + */ + req->sequence = ctx->cached_sq_head; + req->opcode = READ_ONCE(sqe->opcode); + req->user_data = READ_ONCE(sqe->user_data); + req->io = NULL; + req->file = NULL; + req->ctx = ctx; + req->flags = 0; + /* one is dropped after submission, the other at completion */ + refcount_set(&req->refs, 2); + req->task = NULL; + req->result = 0; + INIT_IO_WORK(&req->work, io_wq_submit_work); +} + static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, struct file *ring_file, int ring_fd, struct mm_struct **mm, bool async) @@ -5844,23 +5853,15 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, io_consume_sqe(ctx); break; } - req = io_get_req(ctx, statep); + req = io_alloc_req(ctx, statep); if (unlikely(!req)) { if (!submitted) submitted = -EAGAIN; break; } - /* - * All io need record the previous position, if LINK vs DARIN, - * it can be used to mark the position of the first IO in the - * link list. - */ - req->sequence = ctx->cached_sq_head; - req->opcode = READ_ONCE(sqe->opcode); - req->user_data = READ_ONCE(sqe->user_data); + io_init_req(ctx, req, sqe); io_consume_sqe(ctx); - /* will complete beyond this point, count as submitted */ submitted++; From 9c280f9087118099f50566e906b9d9d5a0fb4529 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 8 Apr 2020 08:58:46 +0300 Subject: [PATCH 230/280] io_uring: don't read user-shared sqe flags twice Don't re-read userspace-shared sqe->flags, it can be exploited. sqe->flags are copied into req->flags in io_submit_sqe(), check them there instead. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7f9bf8b6e6af..21e1c69b9c43 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2931,7 +2931,7 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (sqe->ioprio || sqe->buf_index) return -EINVAL; - if (sqe->flags & IOSQE_FIXED_FILE) + if (req->flags & REQ_F_FIXED_FILE) return -EBADF; if (req->flags & REQ_F_NEED_CLEANUP) return 0; @@ -2964,7 +2964,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (sqe->ioprio || sqe->buf_index) return -EINVAL; - if (sqe->flags & IOSQE_FIXED_FILE) + if (req->flags & REQ_F_FIXED_FILE) return -EBADF; if (req->flags & REQ_F_NEED_CLEANUP) return 0; @@ -3318,7 +3318,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (sqe->ioprio || sqe->buf_index) return -EINVAL; - if (sqe->flags & IOSQE_FIXED_FILE) + if (req->flags & REQ_F_FIXED_FILE) return -EBADF; if (req->flags & REQ_F_NEED_CLEANUP) return 0; @@ -3395,7 +3395,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index) return -EINVAL; - if (sqe->flags & IOSQE_FIXED_FILE) + if (req->flags & REQ_F_FIXED_FILE) return -EBADF; req->close.fd = READ_ONCE(sqe->fd); @@ -5366,15 +5366,10 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req, } static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, - const struct io_uring_sqe *sqe) + int fd, unsigned int flags) { - unsigned flags; - int fd; bool fixed; - flags = READ_ONCE(sqe->flags); - fd = READ_ONCE(sqe->fd); - if (!io_req_needs_file(req, fd)) return 0; @@ -5616,7 +5611,7 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, { struct io_ring_ctx *ctx = req->ctx; unsigned int sqe_flags; - int ret, id; + int ret, id, fd; sqe_flags = READ_ONCE(sqe->flags); @@ -5647,7 +5642,8 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, IOSQE_ASYNC | IOSQE_FIXED_FILE | IOSQE_BUFFER_SELECT); - ret = io_req_set_file(state, req, sqe); + fd = READ_ONCE(sqe->fd); + ret = io_req_set_file(state, req, fd, sqe_flags); if (unlikely(ret)) { err_req: io_cqring_add_event(req, ret); From aa317d3351dee7cb0b27db808af0cd2340dcbaef Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 8 Apr 2020 13:01:03 -0400 Subject: [PATCH 231/280] orangefs: clarify build steps for test server in orangefs.txt Signed-off-by: Mike Marshall --- Documentation/filesystems/orangefs.txt | 34 ++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/Documentation/filesystems/orangefs.txt b/Documentation/filesystems/orangefs.txt index f4ba94950e3f..5a3865702a71 100644 --- a/Documentation/filesystems/orangefs.txt +++ b/Documentation/filesystems/orangefs.txt @@ -38,16 +38,6 @@ DOCUMENTATION http://www.orangefs.org/documentation/ - -USERSPACE FILESYSTEM SOURCE -=========================== - -http://www.orangefs.org/download - -Orangefs versions prior to 2.9.3 would not be compatible with the -upstream version of the kernel client. - - RUNNING ORANGEFS ON A SINGLE SERVER =================================== @@ -91,6 +81,14 @@ Mount the filesystem. mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt +USERSPACE FILESYSTEM SOURCE +=========================== + +http://www.orangefs.org/download + +Orangefs versions prior to 2.9.3 would not be compatible with the +upstream version of the kernel client. + BUILDING ORANGEFS ON A SINGLE SERVER ==================================== @@ -102,18 +100,24 @@ You can omit --prefix if you don't care that things are sprinkled around in /usr/local. As of version 2.9.6, OrangeFS uses Berkeley DB by default, we will probably be changing the default to LMDB soon. -./configure --prefix=/opt/ofs --with-db-backend=lmdb +./configure --prefix=/opt/ofs --with-db-backend=lmdb --disable-usrint make make install -Create an orangefs config file. +Create an orangefs config file by running pvfs2-genconfig and +specifying a target config file. Pvfs2-genconfig will prompt you +through. Generally it works fine to take the defaults, but you +should use your server's hostname, rather than "localhost" when +it comes to that question. /opt/ofs/bin/pvfs2-genconfig /etc/pvfs2.conf Create an /etc/pvfs2tab file. +Localhost is fine for your pvfs2tab file: + echo tcp://localhost:3334/orangefs /pvfsmnt pvfs2 defaults,noauto 0 0 > \ /etc/pvfs2tab @@ -127,7 +131,7 @@ Bootstrap the server. Start the server. -/opt/osf/sbin/pvfs2-server /etc/pvfs2.conf +/opt/ofs/sbin/pvfs2-server /etc/pvfs2.conf Now the server should be running. Pvfs2-ls is a simple test to verify that the server is running. @@ -137,11 +141,11 @@ test to verify that the server is running. If stuff seems to be working, load the kernel module and turn on the client core. -/opt/ofs/sbin/pvfs2-client -p /opt/osf/sbin/pvfs2-client-core +/opt/ofs/sbin/pvfs2-client -p /opt/ofs/sbin/pvfs2-client-core Mount your filesystem. -mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt +mount -t pvfs2 tcp://`hostname`:3334/orangefs /pvfsmnt RUNNING XFSTESTS From 487eca11a321ef33bcf4ca5adb3c0c4954db1b58 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 7 Apr 2020 20:21:26 +0800 Subject: [PATCH 232/280] drm/amdgpu: fix gfx hang during suspend with video playback (v2) The system will be hang up during S3 suspend because of SMU is pending for GC not respose the register CP_HQD_ACTIVE access request.This issue root cause of accessing the GC register under enter GFX CGGPG and can be fixed by disable GFX CGPG before perform suspend. v2: Use disable the GFX CGPG instead of RLC safe mode guard. Signed-off-by: Prike Liang Tested-by: Mengbing Wang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index faa3e7102156..559dc24ef436 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2340,8 +2340,6 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) { int i, r; - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); - amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) @@ -3356,6 +3354,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) } } + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + amdgpu_amdkfd_suspend(adev, !fbcon); amdgpu_ras_suspend(adev); From 3abd1e95e0b0aa816d4dd63cac33a423afb4a7f2 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 3 Apr 2020 13:19:14 +0800 Subject: [PATCH 233/280] drm/amd/powerplay: error out on forcing clock setting not supported For Arcturus, forcing clock to some specific level is not supported with 54.18 and onwards SMU firmware. As according to firmware team, they adopt new gfx dpm tuned parameters which can cover all the use case in a much smooth way. Thus setting through driver interface is not needed and maybe do a disservice. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 47 +++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 5db8c56066ee..1ef0923f7190 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -794,8 +794,21 @@ static int arcturus_force_clk_levels(struct smu_context *smu, struct arcturus_dpm_table *dpm_table; struct arcturus_single_dpm_table *single_dpm_table; uint32_t soft_min_level, soft_max_level; + uint32_t smu_version; int ret = 0; + ret = smu_get_smc_version(smu, NULL, &smu_version); + if (ret) { + pr_err("Failed to get smu version!\n"); + return ret; + } + + if (smu_version >= 0x361200) { + pr_err("Forcing clock level is not supported with " + "54.18 and onwards SMU firmwares\n"); + return -EOPNOTSUPP; + } + soft_min_level = mask ? (ffs(mask) - 1) : 0; soft_max_level = mask ? (fls(mask) - 1) : 0; @@ -1512,6 +1525,38 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, return 0; } +static int arcturus_set_performance_level(struct smu_context *smu, + enum amd_dpm_forced_level level) +{ + uint32_t smu_version; + int ret; + + ret = smu_get_smc_version(smu, NULL, &smu_version); + if (ret) { + pr_err("Failed to get smu version!\n"); + return ret; + } + + switch (level) { + case AMD_DPM_FORCED_LEVEL_HIGH: + case AMD_DPM_FORCED_LEVEL_LOW: + case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + if (smu_version >= 0x361200) { + pr_err("Forcing clock level is not supported with " + "54.18 and onwards SMU firmwares\n"); + return -EOPNOTSUPP; + } + break; + default: + break; + } + + return smu_v11_0_set_performance_level(smu, level); +} + static void arcturus_dump_pptable(struct smu_context *smu) { struct smu_table_context *table_context = &smu->smu_table; @@ -2285,7 +2330,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .get_profiling_clk_mask = arcturus_get_profiling_clk_mask, .get_power_profile_mode = arcturus_get_power_profile_mode, .set_power_profile_mode = arcturus_set_power_profile_mode, - .set_performance_level = smu_v11_0_set_performance_level, + .set_performance_level = arcturus_set_performance_level, /* debug (internal used) */ .dump_pptable = arcturus_dump_pptable, .get_power_limit = arcturus_get_power_limit, From 2960758cce2310774de60bbbd8d6841d436c54d9 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 7 Apr 2020 17:46:04 +0800 Subject: [PATCH 234/280] drm/amdgpu: unify fw_write_wait for new gfx9 asics Make the fw_write_wait default case true since presumably all new gfx9 asics will have updated firmware. That is using unique WAIT_REG_MEM packet with opration=1. Signed-off-by: Aaron Liu Tested-by: Aaron Liu Tested-by: Yuxian Dai Acked-by: Alex Deucher Acked-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 608ffe3b684e..e6b113ed2f40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1217,6 +1217,8 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.mec_fw_write_wait = true; break; default: + adev->gfx.me_fw_write_wait = true; + adev->gfx.mec_fw_write_wait = true; break; } } From 7e7ea24f0b46cd3078bc9af29d1c1aced89d1c8e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Apr 2020 09:30:11 -0400 Subject: [PATCH 235/280] drm/amdgpu/display: fix warning when compiling without debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fixes unused variable warning. Reported-by: Eric Biggers Acked-by: Christian König Reviewed-by: Mikita Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2167cabd0384..f7c5cdc10a70 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4723,10 +4723,10 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector) static int amdgpu_dm_connector_late_register(struct drm_connector *connector) { +#if defined(CONFIG_DEBUG_FS) struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); -#if defined(CONFIG_DEBUG_FS) connector_debugfs_init(amdgpu_dm_connector); #endif From b2ecb89c27a4fd110187e0afeca70557215f55a1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Apr 2020 22:59:26 +0100 Subject: [PATCH 236/280] drm/legacy: Fix type for drm_local_map.offset drm_local_map.offset is not only used for resource_size_t but also dma_addr_t which may be of different sizes. Reported-by: Nathan Chancellor Fixes: 8e4ff9b56957 ("drm: Remove the dma_alloc_coherent wrapper for internal usage") Tested-by: Nathan Chancellor # build Signed-off-by: Chris Wilson Cc: Dave Airlie Cc: Nathan Chancellor Cc: Linus Torvalds Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200402215926.30714-1-chris@chris-wilson.co.uk --- include/drm/drm_legacy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_legacy.h b/include/drm/drm_legacy.h index dcef3598f49e..aed382c17b26 100644 --- a/include/drm/drm_legacy.h +++ b/include/drm/drm_legacy.h @@ -136,7 +136,7 @@ struct drm_sg_mem { * Kernel side of a mapping */ struct drm_local_map { - resource_size_t offset; /**< Requested physical address (0 for SAREA)*/ + dma_addr_t offset; /**< Requested physical address (0 for SAREA)*/ unsigned long size; /**< Requested physical size (bytes) */ enum drm_map_type type; /**< Type of memory to map */ enum drm_map_flags flags; /**< Flags */ From 152cce0006abf7e17dfb7dc94896b044bda4e588 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 10 Mar 2020 11:34:27 +0100 Subject: [PATCH 237/280] drm/bridge: analogix_dp: Split bind() into probe() and real bind() Analogix_dp driver acquires all its resources in the ->bind() callback, what is a bit against the component driver based approach, where the driver initialization is split into a probe(), where all resources are gathered, and a bind(), where all objects are created and a compound driver is initialized. Extract all the resource related operations to analogix_dp_probe() and analogix_dp_remove(), then call them before/after registration of the device components from the main Exynos DP and Rockchip DP drivers. Also move the plat_data initialization to the probe() to make it available for the analogix_dp_probe() function. This fixes the multiple calls to the bind() of the DRM compound driver when the DP PHY driver is not yet loaded/probed: [drm] Exynos DRM: using 14400000.fimd device for DMA mapping operations exynos-drm exynos-drm: bound 14400000.fimd (ops fimd_component_ops [exynosdrm]) exynos-drm exynos-drm: bound 14450000.mixer (ops mixer_component_ops [exynosdrm]) exynos-dp 145b0000.dp-controller: no DP phy configured exynos-drm exynos-drm: failed to bind 145b0000.dp-controller (ops exynos_dp_ops [exynosdrm]): -517 exynos-drm exynos-drm: master bind failed: -517 ... [drm] Exynos DRM: using 14400000.fimd device for DMA mapping operations exynos-drm exynos-drm: bound 14400000.fimd (ops hdmi_enable [exynosdrm]) exynos-drm exynos-drm: bound 14450000.mixer (ops hdmi_enable [exynosdrm]) exynos-drm exynos-drm: bound 145b0000.dp-controller (ops hdmi_enable [exynosdrm]) exynos-drm exynos-drm: bound 14530000.hdmi (ops hdmi_enable [exynosdrm]) [drm] Supports vblank timestamp caching Rev 2 (21.10.2013). Console: switching to colour frame buffer device 170x48 exynos-drm exynos-drm: fb0: exynosdrmfb frame buffer device [drm] Initialized exynos 1.1.0 20180330 for exynos-drm on minor 1 ... Signed-off-by: Marek Szyprowski Acked-by: Andy Yan Reviewed-by: Andrzej Hajda Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20200310103427.26048-1-m.szyprowski@samsung.com (cherry picked from commit 83a196773b8bc6702f49df1eddc848180e350340) Signed-off-by: Maxime Ripard --- .../drm/bridge/analogix/analogix_dp_core.c | 33 +++++++++++------ drivers/gpu/drm/exynos/exynos_dp.c | 29 ++++++++------- .../gpu/drm/rockchip/analogix_dp-rockchip.c | 36 ++++++++++--------- include/drm/bridge/analogix_dp.h | 5 +-- 4 files changed, 61 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 9ded2cef57dd..76736fb8ed94 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1652,8 +1652,7 @@ static ssize_t analogix_dpaux_transfer(struct drm_dp_aux *aux, } struct analogix_dp_device * -analogix_dp_bind(struct device *dev, struct drm_device *drm_dev, - struct analogix_dp_plat_data *plat_data) +analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) { struct platform_device *pdev = to_platform_device(dev); struct analogix_dp_device *dp; @@ -1756,22 +1755,30 @@ analogix_dp_bind(struct device *dev, struct drm_device *drm_dev, irq_flags, "analogix-dp", dp); if (ret) { dev_err(&pdev->dev, "failed to request irq\n"); - goto err_disable_pm_runtime; + return ERR_PTR(ret); } disable_irq(dp->irq); + return dp; +} +EXPORT_SYMBOL_GPL(analogix_dp_probe); + +int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) +{ + int ret; + dp->drm_dev = drm_dev; dp->encoder = dp->plat_data->encoder; dp->aux.name = "DP-AUX"; dp->aux.transfer = analogix_dpaux_transfer; - dp->aux.dev = &pdev->dev; + dp->aux.dev = dp->dev; ret = drm_dp_aux_register(&dp->aux); if (ret) - return ERR_PTR(ret); + return ret; - pm_runtime_enable(dev); + pm_runtime_enable(dp->dev); ret = analogix_dp_create_bridge(drm_dev, dp); if (ret) { @@ -1779,13 +1786,12 @@ analogix_dp_bind(struct device *dev, struct drm_device *drm_dev, goto err_disable_pm_runtime; } - return dp; + return 0; err_disable_pm_runtime: + pm_runtime_disable(dp->dev); - pm_runtime_disable(dev); - - return ERR_PTR(ret); + return ret; } EXPORT_SYMBOL_GPL(analogix_dp_bind); @@ -1802,10 +1808,15 @@ void analogix_dp_unbind(struct analogix_dp_device *dp) drm_dp_aux_unregister(&dp->aux); pm_runtime_disable(dp->dev); - clk_disable_unprepare(dp->clock); } EXPORT_SYMBOL_GPL(analogix_dp_unbind); +void analogix_dp_remove(struct analogix_dp_device *dp) +{ + clk_disable_unprepare(dp->clock); +} +EXPORT_SYMBOL_GPL(analogix_dp_remove); + #ifdef CONFIG_PM int analogix_dp_suspend(struct analogix_dp_device *dp) { diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c index d23d3502ca91..5ee090691390 100644 --- a/drivers/gpu/drm/exynos/exynos_dp.c +++ b/drivers/gpu/drm/exynos/exynos_dp.c @@ -159,15 +159,8 @@ static int exynos_dp_bind(struct device *dev, struct device *master, void *data) struct drm_device *drm_dev = data; int ret; - dp->dev = dev; dp->drm_dev = drm_dev; - dp->plat_data.dev_type = EXYNOS_DP; - dp->plat_data.power_on_start = exynos_dp_poweron; - dp->plat_data.power_off = exynos_dp_poweroff; - dp->plat_data.attach = exynos_dp_bridge_attach; - dp->plat_data.get_modes = exynos_dp_get_modes; - if (!dp->plat_data.panel && !dp->ptn_bridge) { ret = exynos_dp_dt_parse_panel(dp); if (ret) @@ -185,13 +178,11 @@ static int exynos_dp_bind(struct device *dev, struct device *master, void *data) dp->plat_data.encoder = encoder; - dp->adp = analogix_dp_bind(dev, dp->drm_dev, &dp->plat_data); - if (IS_ERR(dp->adp)) { + ret = analogix_dp_bind(dp->adp, dp->drm_dev); + if (ret) dp->encoder.funcs->destroy(&dp->encoder); - return PTR_ERR(dp->adp); - } - return 0; + return ret; } static void exynos_dp_unbind(struct device *dev, struct device *master, @@ -222,6 +213,7 @@ static int exynos_dp_probe(struct platform_device *pdev) if (!dp) return -ENOMEM; + dp->dev = dev; /* * We just use the drvdata until driver run into component * add function, and then we would set drvdata to null, so @@ -247,16 +239,29 @@ static int exynos_dp_probe(struct platform_device *pdev) /* The remote port can be either a panel or a bridge */ dp->plat_data.panel = panel; + dp->plat_data.dev_type = EXYNOS_DP; + dp->plat_data.power_on_start = exynos_dp_poweron; + dp->plat_data.power_off = exynos_dp_poweroff; + dp->plat_data.attach = exynos_dp_bridge_attach; + dp->plat_data.get_modes = exynos_dp_get_modes; dp->plat_data.skip_connector = !!bridge; + dp->ptn_bridge = bridge; out: + dp->adp = analogix_dp_probe(dev, &dp->plat_data); + if (IS_ERR(dp->adp)) + return PTR_ERR(dp->adp); + return component_add(&pdev->dev, &exynos_dp_ops); } static int exynos_dp_remove(struct platform_device *pdev) { + struct exynos_dp_device *dp = platform_get_drvdata(pdev); + component_del(&pdev->dev, &exynos_dp_ops); + analogix_dp_remove(dp->adp); return 0; } diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c index f38f5e113c6b..ce98c08aa8b4 100644 --- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c @@ -325,15 +325,9 @@ static int rockchip_dp_bind(struct device *dev, struct device *master, void *data) { struct rockchip_dp_device *dp = dev_get_drvdata(dev); - const struct rockchip_dp_chip_data *dp_data; struct drm_device *drm_dev = data; int ret; - dp_data = of_device_get_match_data(dev); - if (!dp_data) - return -ENODEV; - - dp->data = dp_data; dp->drm_dev = drm_dev; ret = rockchip_dp_drm_create_encoder(dp); @@ -344,16 +338,9 @@ static int rockchip_dp_bind(struct device *dev, struct device *master, dp->plat_data.encoder = &dp->encoder; - dp->plat_data.dev_type = dp->data->chip_type; - dp->plat_data.power_on_start = rockchip_dp_poweron_start; - dp->plat_data.power_off = rockchip_dp_powerdown; - dp->plat_data.get_modes = rockchip_dp_get_modes; - - dp->adp = analogix_dp_bind(dev, dp->drm_dev, &dp->plat_data); - if (IS_ERR(dp->adp)) { - ret = PTR_ERR(dp->adp); + ret = analogix_dp_bind(dp->adp, drm_dev); + if (ret) goto err_cleanup_encoder; - } return 0; err_cleanup_encoder: @@ -368,8 +355,6 @@ static void rockchip_dp_unbind(struct device *dev, struct device *master, analogix_dp_unbind(dp->adp); dp->encoder.funcs->destroy(&dp->encoder); - - dp->adp = ERR_PTR(-ENODEV); } static const struct component_ops rockchip_dp_component_ops = { @@ -380,10 +365,15 @@ static const struct component_ops rockchip_dp_component_ops = { static int rockchip_dp_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + const struct rockchip_dp_chip_data *dp_data; struct drm_panel *panel = NULL; struct rockchip_dp_device *dp; int ret; + dp_data = of_device_get_match_data(dev); + if (!dp_data) + return -ENODEV; + ret = drm_of_find_panel_or_bridge(dev->of_node, 1, 0, &panel, NULL); if (ret < 0) return ret; @@ -394,7 +384,12 @@ static int rockchip_dp_probe(struct platform_device *pdev) dp->dev = dev; dp->adp = ERR_PTR(-ENODEV); + dp->data = dp_data; dp->plat_data.panel = panel; + dp->plat_data.dev_type = dp->data->chip_type; + dp->plat_data.power_on_start = rockchip_dp_poweron_start; + dp->plat_data.power_off = rockchip_dp_powerdown; + dp->plat_data.get_modes = rockchip_dp_get_modes; ret = rockchip_dp_of_probe(dp); if (ret < 0) @@ -402,12 +397,19 @@ static int rockchip_dp_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dp); + dp->adp = analogix_dp_probe(dev, &dp->plat_data); + if (IS_ERR(dp->adp)) + return PTR_ERR(dp->adp); + return component_add(dev, &rockchip_dp_component_ops); } static int rockchip_dp_remove(struct platform_device *pdev) { + struct rockchip_dp_device *dp = platform_get_drvdata(pdev); + component_del(&pdev->dev, &rockchip_dp_component_ops); + analogix_dp_remove(dp->adp); return 0; } diff --git a/include/drm/bridge/analogix_dp.h b/include/drm/bridge/analogix_dp.h index 7aa2f93da49c..b0dcc07334a1 100644 --- a/include/drm/bridge/analogix_dp.h +++ b/include/drm/bridge/analogix_dp.h @@ -42,9 +42,10 @@ int analogix_dp_resume(struct analogix_dp_device *dp); int analogix_dp_suspend(struct analogix_dp_device *dp); struct analogix_dp_device * -analogix_dp_bind(struct device *dev, struct drm_device *drm_dev, - struct analogix_dp_plat_data *plat_data); +analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data); +int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev); void analogix_dp_unbind(struct analogix_dp_device *dp); +void analogix_dp_remove(struct analogix_dp_device *dp); int analogix_dp_start_crc(struct drm_connector *connector); int analogix_dp_stop_crc(struct drm_connector *connector); From d6f34f4c6b4a962eb7a86c923fea206f866a40be Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 9 Apr 2020 09:00:01 +0200 Subject: [PATCH 238/280] x86/xen: fix booting 32-bit pv guest Commit 2f62f36e62daec ("x86/xen: Make the boot CPU idle task reliable") introduced a regression for booting 32 bit Xen PV guests: the address of the initial stack needs to be a virtual one. Fixes: 2f62f36e62daec ("x86/xen: Make the boot CPU idle task reliable") Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20200409070001.16675-1-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/xen-head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7d1c4fcbe8f7..1ba601df3a37 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -38,7 +38,7 @@ SYM_CODE_START(startup_xen) #ifdef CONFIG_X86_64 mov initial_stack(%rip), %rsp #else - mov pa(initial_stack), %esp + mov initial_stack, %esp #endif #ifdef CONFIG_X86_64 From 7667e63c8af90e287f9e2d070599024cbabe63f5 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Thu, 28 Nov 2019 16:10:42 +0800 Subject: [PATCH 239/280] ahci: Add Intel Comet Lake PCH RAID PCI ID Intel Comet Lake should use the default LPM policy for mobile chipsets. So, add the PCI ID to the driver list of supported devices. Signed-off-by: Jian-Hong Pan Signed-off-by: Jens Axboe --- drivers/ata/ahci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index f23a99278c1d..0101b65250cb 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -410,6 +410,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x22a3), board_ahci_mobile }, /* Cherry Tr. AHCI */ { PCI_VDEVICE(INTEL, 0x5ae3), board_ahci_mobile }, /* ApolloLake AHCI */ { PCI_VDEVICE(INTEL, 0x34d3), board_ahci_mobile }, /* Ice Lake LP AHCI */ + { PCI_VDEVICE(INTEL, 0x02d7), board_ahci_mobile }, /* Comet Lake PCH RAID */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, From c398ecb3d611925e4a5411afdf7489914a5c0460 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 9 Apr 2020 08:17:59 +0300 Subject: [PATCH 240/280] io_uring: fix fs cleanup on cqe overflow If completion queue overflow occurs, __io_cqring_fill_event() will update req->cflags, which is in a union with req->work and happens to be aliased to req->work.fs. Following io_free_req() -> io_req_work_drop_env() may get a bunch of different problems (miscount fs->users, segfault, etc) on cleaning @fs. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 21e1c69b9c43..be65eda059ac 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -608,6 +608,7 @@ struct io_kiocb { }; struct io_async_ctx *io; + int cflags; bool needs_fixed_file; u8 opcode; @@ -638,7 +639,6 @@ struct io_kiocb { struct callback_head task_work; struct hlist_node hash_node; struct async_poll *apoll; - int cflags; }; struct io_wq_work work; }; From 63f818f46af9f8b3f17b9695501e8d08959feb60 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Apr 2020 09:43:04 -0500 Subject: [PATCH 241/280] proc: Use a dedicated lock in struct pid syzbot wrote: > ======================================================== > WARNING: possible irq lock inversion dependency detected > 5.6.0-syzkaller #0 Not tainted > -------------------------------------------------------- > swapper/1/0 just changed the state of lock: > ffffffff898090d8 (tasklist_lock){.+.?}-{2:2}, at: send_sigurg+0x9f/0x320 fs/fcntl.c:840 > but this lock took another, SOFTIRQ-unsafe lock in the past: > (&pid->wait_pidfd){+.+.}-{2:2} > > > and interrupts could create inverse lock ordering between them. > > > other info that might help us debug this: > Possible interrupt unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(&pid->wait_pidfd); > local_irq_disable(); > lock(tasklist_lock); > lock(&pid->wait_pidfd); > > lock(tasklist_lock); > > *** DEADLOCK *** > > 4 locks held by swapper/1/0: The problem is that because wait_pidfd.lock is taken under the tasklist lock. It must always be taken with irqs disabled as tasklist_lock can be taken from interrupt context and if wait_pidfd.lock was already taken this would create a lock order inversion. Oleg suggested just disabling irqs where I have added extra calls to wait_pidfd.lock. That should be safe and I think the code will eventually do that. It was rightly pointed out by Christian that sharing the wait_pidfd.lock was a premature optimization. It is also true that my pre-merge window testing was insufficient. So remove the premature optimization and give struct pid a dedicated lock of it's own for struct pid things. I have verified that lockdep sees all 3 paths where we take the new pid->lock and lockdep does not complain. It is my current day dream that one day pid->lock can be used to guard the task lists as well and then the tasklist_lock won't need to be held to deliver signals. That will require taking pid->lock with irqs disabled. Acked-by: Christian Brauner Link: https://lore.kernel.org/lkml/00000000000011d66805a25cd73f@google.com/ Cc: Oleg Nesterov Cc: Christian Brauner Reported-by: syzbot+343f75cdeea091340956@syzkaller.appspotmail.com Reported-by: syzbot+832aabf700bc3ec920b9@syzkaller.appspotmail.com Reported-by: syzbot+f675f964019f884dbd0f@syzkaller.appspotmail.com Reported-by: syzbot+a9fb1457d720a55d6dc5@syzkaller.appspotmail.com Fixes: 7bc3e6e55acf ("proc: Use a list of inodes to flush from proc") Signed-off-by: "Eric W. Biederman" --- fs/proc/base.c | 10 +++++----- include/linux/pid.h | 1 + kernel/pid.c | 1 + 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 74f948a6b621..6042b646ab27 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1839,9 +1839,9 @@ void proc_pid_evict_inode(struct proc_inode *ei) struct pid *pid = ei->pid; if (S_ISDIR(ei->vfs_inode.i_mode)) { - spin_lock(&pid->wait_pidfd.lock); + spin_lock(&pid->lock); hlist_del_init_rcu(&ei->sibling_inodes); - spin_unlock(&pid->wait_pidfd.lock); + spin_unlock(&pid->lock); } put_pid(pid); @@ -1877,9 +1877,9 @@ struct inode *proc_pid_make_inode(struct super_block * sb, /* Let the pid remember us for quick removal */ ei->pid = pid; if (S_ISDIR(mode)) { - spin_lock(&pid->wait_pidfd.lock); + spin_lock(&pid->lock); hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); - spin_unlock(&pid->wait_pidfd.lock); + spin_unlock(&pid->lock); } task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); @@ -3273,7 +3273,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = { void proc_flush_pid(struct pid *pid) { - proc_invalidate_siblings_dcache(&pid->inodes, &pid->wait_pidfd.lock); + proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock); put_pid(pid); } diff --git a/include/linux/pid.h b/include/linux/pid.h index 01a0d4e28506..cc896f0fc4e3 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -60,6 +60,7 @@ struct pid { refcount_t count; unsigned int level; + spinlock_t lock; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; diff --git a/kernel/pid.c b/kernel/pid.c index efd34874b3d1..517d0855d4cf 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -246,6 +246,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, get_pid_ns(ns); refcount_set(&pid->count, 1); + spin_lock_init(&pid->lock); for (type = 0; type < PIDTYPE_MAX; ++type) INIT_HLIST_HEAD(&pid->tasks[type]); From c445c1668dca9cb908f75e000ab20351bce7c416 Mon Sep 17 00:00:00 2001 From: "Thomas Hellstrom (VMware)" Date: Thu, 9 Apr 2020 18:49:25 +0200 Subject: [PATCH 242/280] drm/ttm: Temporarily disable the huge_fault() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With amdgpu and CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y, there are errors like: BUG: non-zero pgtables_bytes on freeing mm and: BUG: Bad rss-counter state with TTM transparent huge-pages. Until we've figured out what other TTM drivers do differently compared to vmwgfx, disable the huge_fault() callback, eliminating transhuge page-table entries. Cc: Christian König Signed-off-by: Thomas Hellstrom (VMware) Reported-by: Alex Xu (Hello71) Tested-by: Alex Xu (Hello71) Acked-by: Christian König Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20200409164925.11912-1-thomas_os@shipmail.org --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 63 --------------------------------- 1 file changed, 63 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 0af14835504c..0871c7d6cb8c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -436,66 +436,6 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) } EXPORT_SYMBOL(ttm_bo_vm_fault); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -/** - * ttm_pgprot_is_wrprotecting - Is a page protection value write-protecting? - * @prot: The page protection value - * - * Return: true if @prot is write-protecting. false otherwise. - */ -static bool ttm_pgprot_is_wrprotecting(pgprot_t prot) -{ - /* - * This is meant to say "pgprot_wrprotect(prot) == prot" in a generic - * way. Unfortunately there is no generic pgprot_wrprotect. - */ - return pte_val(pte_wrprotect(__pte(pgprot_val(prot)))) == - pgprot_val(prot); -} - -static vm_fault_t ttm_bo_vm_huge_fault(struct vm_fault *vmf, - enum page_entry_size pe_size) -{ - struct vm_area_struct *vma = vmf->vma; - pgprot_t prot; - struct ttm_buffer_object *bo = vma->vm_private_data; - vm_fault_t ret; - pgoff_t fault_page_size = 0; - bool write = vmf->flags & FAULT_FLAG_WRITE; - - switch (pe_size) { - case PE_SIZE_PMD: - fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT; - break; -#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD - case PE_SIZE_PUD: - fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT; - break; -#endif - default: - WARN_ON_ONCE(1); - return VM_FAULT_FALLBACK; - } - - /* Fallback on write dirty-tracking or COW */ - if (write && ttm_pgprot_is_wrprotecting(vma->vm_page_prot)) - return VM_FAULT_FALLBACK; - - ret = ttm_bo_vm_reserve(bo, vmf); - if (ret) - return ret; - - prot = vm_get_page_prot(vma->vm_flags); - ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size); - if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) - return ret; - - dma_resv_unlock(bo->base.resv); - - return ret; -} -#endif - void ttm_bo_vm_open(struct vm_area_struct *vma) { struct ttm_buffer_object *bo = vma->vm_private_data; @@ -598,9 +538,6 @@ static const struct vm_operations_struct ttm_bo_vm_ops = { .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, .access = ttm_bo_vm_access, -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - .huge_fault = ttm_bo_vm_huge_fault, -#endif }; static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev, From 85faa7b8346ebef0606d2d0df6d3f8c76acb3654 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 9 Apr 2020 18:14:00 -0600 Subject: [PATCH 243/280] io_uring: punt final io_ring_ctx wait-and-free to workqueue We can't reliably wait in io_ring_ctx_wait_and_kill(), since the task_works list isn't ordered (in fact it's LIFO ordered). We could either fix this with a separate task_works list for io_uring work, or just punt the wait-and-free to async context. This ensures that task_work that comes in while we're shutting down is processed correctly. If we don't go async, we could have work past the fput() work for the ring that depends on work that won't be executed until after we're done with the wait-and-free. But as this operation is blocking, it'll never get a chance to run. This was reproduced with hundreds of thousands of sockets running memcached, haven't been able to reproduce this synthetically. Reported-by: Dan Melnic Signed-off-by: Jens Axboe --- fs/io_uring.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index be65eda059ac..5190bfb6a665 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -326,6 +326,8 @@ struct io_ring_ctx { spinlock_t inflight_lock; struct list_head inflight_list; } ____cacheline_aligned_in_smp; + + struct work_struct exit_work; }; /* @@ -7271,6 +7273,18 @@ static int io_remove_personalities(int id, void *p, void *data) return 0; } +static void io_ring_exit_work(struct work_struct *work) +{ + struct io_ring_ctx *ctx; + + ctx = container_of(work, struct io_ring_ctx, exit_work); + if (ctx->rings) + io_cqring_overflow_flush(ctx, true); + + wait_for_completion(&ctx->completions[0]); + io_ring_ctx_free(ctx); +} + static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); @@ -7298,8 +7312,8 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) if (ctx->rings) io_cqring_overflow_flush(ctx, true); idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx); - wait_for_completion(&ctx->completions[0]); - io_ring_ctx_free(ctx); + INIT_WORK(&ctx->exit_work, io_ring_exit_work); + queue_work(system_wq, &ctx->exit_work); } static int io_uring_release(struct inode *inode, struct file *file) From cb6b771b05c3026a85ed4817c1b87c5e6f41d136 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 10 Apr 2020 14:31:47 +0200 Subject: [PATCH 244/280] block: fix busy device checking in blk_drop_partitions again The previous fix had an off by one in the bd_openers checking, counting the callers blkdev_get. Fixes: d3ef5536274f ("block: fix busy device checking in blk_drop_partitions") Reported-by: Qian Cai Signed-off-by: Christoph Hellwig Tested-by: Qian Cai Signed-off-by: Jens Axboe --- block/partitions/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index 1a0a829d8416..bc1ded1331b1 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -496,7 +496,7 @@ int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev) if (!disk_part_scan_enabled(disk)) return 0; - if (bdev->bd_part_count || bdev->bd_openers) + if (bdev->bd_part_count || bdev->bd_openers > 1) return -EBUSY; res = invalidate_partition(disk, 0); if (res) From befacdcf47d4a19eb584ae5b43da40941e8e660c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 8 Apr 2020 10:29:50 -0700 Subject: [PATCH 245/280] Documentation: android: binderfs: add 'stats' mount option Add documentation of the binderfs 'stats' mount option. Description taken from the commit message. Signed-off-by: Randy Dunlap Acked-by: Christian Brauner Link: https://lore.kernel.org/r/baa0aa81-007d-af46-16a5-91fead0bd1b9@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/binderfs.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/admin-guide/binderfs.rst b/Documentation/admin-guide/binderfs.rst index c009671f8434..8243af9b3510 100644 --- a/Documentation/admin-guide/binderfs.rst +++ b/Documentation/admin-guide/binderfs.rst @@ -33,6 +33,12 @@ max a per-instance limit. If ``max=`` is set then only ```` number of binder devices can be allocated in this binderfs instance. +stats + Using ``stats=global`` enables global binder statistics. + ``stats=global`` is only available for a binderfs instance mounted in the + initial user namespace. An attempt to use the option to mount a binderfs + instance in another user namespace will return a permission error. + Allocating binder Devices ------------------------- From ab6f762f0f53162d41497708b33c9a3236d3609e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 3 Mar 2020 20:30:02 +0900 Subject: [PATCH 246/280] printk: queue wake_up_klogd irq_work only if per-CPU areas are ready printk_deferred(), similarly to printk_safe/printk_nmi, does not immediately attempt to print a new message on the consoles, avoiding calls into non-reentrant kernel paths, e.g. scheduler or timekeeping, which potentially can deadlock the system. Those printk() flavors, instead, rely on per-CPU flush irq_work to print messages from safer contexts. For same reasons (recursive scheduler or timekeeping calls) printk() uses per-CPU irq_work in order to wake up user space syslog/kmsg readers. However, only printk_safe/printk_nmi do make sure that per-CPU areas have been initialised and that it's safe to modify per-CPU irq_work. This means that, for instance, should printk_deferred() be invoked "too early", that is before per-CPU areas are initialised, printk_deferred() will perform illegal per-CPU access. Lech Perczak [0] reports that after commit 1b710b1b10ef ("char/random: silence a lockdep splat with printk()") user-space syslog/kmsg readers are not able to read new kernel messages. The reason is printk_deferred() being called too early (as was pointed out by Petr and John). Fix printk_deferred() and do not queue per-CPU irq_work before per-CPU areas are initialized. Link: https://lore.kernel.org/lkml/aa0732c6-5c4e-8a8b-a1c1-75ebe3dca05b@camlintechnologies.com/ Reported-by: Lech Perczak Signed-off-by: Sergey Senozhatsky Tested-by: Jann Horn Reviewed-by: Petr Mladek Cc: Greg Kroah-Hartman Cc: Theodore Ts'o Cc: John Ogness Signed-off-by: Linus Torvalds --- include/linux/printk.h | 5 ----- init/main.c | 1 - kernel/printk/internal.h | 5 +++++ kernel/printk/printk.c | 34 ++++++++++++++++++++++++++++++++++ kernel/printk/printk_safe.c | 11 +---------- 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index 1e6108b8d15f..e061635e0409 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -202,7 +202,6 @@ __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack(void) __cold; -extern void printk_safe_init(void); extern void printk_safe_flush(void); extern void printk_safe_flush_on_panic(void); #else @@ -269,10 +268,6 @@ static inline void dump_stack(void) { } -static inline void printk_safe_init(void) -{ -} - static inline void printk_safe_flush(void) { } diff --git a/init/main.c b/init/main.c index e488213857e2..a48617f2e5e5 100644 --- a/init/main.c +++ b/init/main.c @@ -913,7 +913,6 @@ asmlinkage __visible void __init start_kernel(void) boot_init_stack_canary(); time_init(); - printk_safe_init(); perf_event_init(); profile_init(); call_function_init(); diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index c8e6ab689d42..b2b0f526f249 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -23,6 +23,9 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args); void __printk_safe_enter(void); void __printk_safe_exit(void); +void printk_safe_init(void); +bool printk_percpu_data_ready(void); + #define printk_safe_enter_irqsave(flags) \ do { \ local_irq_save(flags); \ @@ -64,4 +67,6 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } #define printk_safe_enter_irq() local_irq_disable() #define printk_safe_exit_irq() local_irq_enable() +static inline void printk_safe_init(void) { } +static inline bool printk_percpu_data_ready(void) { return false; } #endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 633f41a11d75..9a9b6156270b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -460,6 +460,18 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; +/* + * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before + * per_cpu_areas are initialised. This variable is set to true when + * it's safe to access per-CPU data. + */ +static bool __printk_percpu_data_ready __read_mostly; + +bool printk_percpu_data_ready(void) +{ + return __printk_percpu_data_ready; +} + /* Return log buffer address */ char *log_buf_addr_get(void) { @@ -1146,12 +1158,28 @@ static void __init log_buf_add_cpu(void) static inline void log_buf_add_cpu(void) {} #endif /* CONFIG_SMP */ +static void __init set_percpu_data_ready(void) +{ + printk_safe_init(); + /* Make sure we set this flag only after printk_safe() init is done */ + barrier(); + __printk_percpu_data_ready = true; +} + void __init setup_log_buf(int early) { unsigned long flags; char *new_log_buf; unsigned int free; + /* + * Some archs call setup_log_buf() multiple times - first is very + * early, e.g. from setup_arch(), and second - when percpu_areas + * are initialised. + */ + if (!early) + set_percpu_data_ready(); + if (log_buf != __log_buf) return; @@ -2975,6 +3003,9 @@ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { void wake_up_klogd(void) { + if (!printk_percpu_data_ready()) + return; + preempt_disable(); if (waitqueue_active(&log_wait)) { this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); @@ -2985,6 +3016,9 @@ void wake_up_klogd(void) void defer_console_output(void) { + if (!printk_percpu_data_ready()) + return; + preempt_disable(); __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index b4045e782743..d9a659a686f3 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -27,7 +27,6 @@ * There are situations when we want to make sure that all buffers * were handled or when IRQs are blocked. */ -static int printk_safe_irq_ready __read_mostly; #define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ sizeof(atomic_t) - \ @@ -51,7 +50,7 @@ static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); /* Get flushed in a more safe context. */ static void queue_flush_work(struct printk_safe_seq_buf *s) { - if (printk_safe_irq_ready) + if (printk_percpu_data_ready()) irq_work_queue(&s->work); } @@ -402,14 +401,6 @@ void __init printk_safe_init(void) #endif } - /* - * In the highly unlikely event that a NMI were to trigger at - * this moment. Make sure IRQ work is set up before this - * variable is set. - */ - barrier(); - printk_safe_irq_ready = 1; - /* Flush pending messages that did not have scheduled IRQ works. */ printk_safe_flush(); } From 25efb2ffdf991177e740b2f63e92b4ec7d310a92 Mon Sep 17 00:00:00 2001 From: Simon Gander Date: Fri, 10 Apr 2020 14:32:16 -0700 Subject: [PATCH 247/280] hfsplus: fix crash and filesystem corruption when deleting files When removing files containing extended attributes, the hfsplus driver may remove the wrong entries from the attributes b-tree, causing major filesystem damage and in some cases even kernel crashes. To remove a file, all its extended attributes have to be removed as well. The driver does this by looking up all keys in the attributes b-tree with the cnid of the file. Each of these entries then gets deleted using the key used for searching, which doesn't contain the attribute's name when it should. Since the key doesn't contain the name, the deletion routine will not find the correct entry and instead remove the one in front of it. If parent nodes have to be modified, these become corrupt as well. This causes invalid links and unsorted entries that not even macOS's fsck_hfs is able to fix. To fix this, modify the search key before an entry is deleted from the attributes b-tree by copying the found entry's key into the search key, therefore ensuring that the correct entry gets removed from the tree. Signed-off-by: Simon Gander Signed-off-by: Andrew Morton Reviewed-by: Anton Altaparmakov Cc: Link: http://lkml.kernel.org/r/20200327155541.1521-1-simon@tuxera.com Signed-off-by: Linus Torvalds --- fs/hfsplus/attributes.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index e6d554476db4..eeebe80c6be4 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c @@ -292,6 +292,10 @@ static int __hfsplus_delete_attr(struct inode *inode, u32 cnid, return -ENOENT; } + /* Avoid btree corruption */ + hfs_bnode_read(fd->bnode, fd->search_key, + fd->keyoffset, fd->keylength); + err = hfs_brec_remove(fd); if (err) return err; From 9b8b17541f13809d06f6f873325305ddbb760e3e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 10 Apr 2020 14:32:19 -0700 Subject: [PATCH 248/280] mm, memcg: do not high throttle allocators based on wraparound If a cgroup violates its memory.high constraints, we may end up unduly penalising it. For example, for the following hierarchy: A: max high, 20 usage A/B: 9 high, 10 usage A/C: max high, 10 usage We would end up doing the following calculation below when calculating high delay for A/B: A/B: 10 - 9 = 1... A: 20 - PAGE_COUNTER_MAX = 21, so set max_overage to 21. This gets worse with higher disparities in usage in the parent. I have no idea how this disappeared from the final version of the patch, but it is certainly Not Good(tm). This wasn't obvious in testing because, for a simple cgroup hierarchy with only one child, the result is usually roughly the same. It's only in more complex hierarchies that things go really awry (although still, the effects are limited to a maximum of 2 seconds in schedule_timeout_killable at a maximum). [chris@chrisdown.name: changelog] Fixes: e26733e0d0ec ("mm, memcg: throttle allocators based on ancestral memory.high") Signed-off-by: Jakub Kicinski Signed-off-by: Chris Down Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: Johannes Weiner Cc: [5.4.x] Link: http://lkml.kernel.org/r/20200331152424.GA1019937@chrisdown.name Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 05b4ec2c6499..5beea03dd58a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2336,6 +2336,9 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg, usage = page_counter_read(&memcg->memory); high = READ_ONCE(memcg->high); + if (usage <= high) + continue; + /* * Prevent division by 0 in overage calculation by acting as if * it was a threshold of 1 page From b991cee567bf045097d9426719d7f1477bd7dc59 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Fri, 10 Apr 2020 14:32:22 -0700 Subject: [PATCH 249/280] mm, slab_common: fix a typo in comment "eariler"->"earlier" There is a typo in comment, fix it. s/eariler/earlier/ Signed-off-by: Qiujun Huang Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Acked-by: Christoph Lameter Link: http://lkml.kernel.org/r/20200405160544.1246-1-hqjagain@gmail.com Signed-off-by: Linus Torvalds --- mm/slab_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 93ec4a574d8d..23c7500eea7d 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -731,7 +731,7 @@ static void kmemcg_rcufn(struct rcu_head *head) /* * We need to grab blocking locks. Bounce to ->work. The * work item shares the space with the RCU head and can't be - * initialized eariler. + * initialized earlier. */ INIT_WORK(&s->memcg_params.work, kmemcg_workfn); queue_work(memcg_kmem_cache_wq, &s->memcg_params.work); From 2370ae4b1d5aa7eb70bd7539a420e791d4b0123b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 10 Apr 2020 14:32:25 -0700 Subject: [PATCH 250/280] docs: mm: slab.h: fix a broken cross-reference There is a typo at the cross-reference link, causing this warning: include/linux/slab.h:11: WARNING: undefined label: memory-allocation (if the link has no caption the label must precede a section header) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Cc: Jonathan Corbet Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/0aeac24235d356ebd935d11e147dcc6edbb6465c.1586359676.git.mchehab+huawei@kernel.org Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 03a389358562..6d454886bcaf 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -501,7 +501,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * :ref:`Documentation/core-api/mm-api.rst ` * * The recommended usage of the @flags is described at - * :ref:`Documentation/core-api/memory-allocation.rst ` + * :ref:`Documentation/core-api/memory-allocation.rst ` * * Below is a brief outline of the most useful GFP flags * From e6a0a7ad1c2b6608e294fbbce60c42ba5a1304ce Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 10 Apr 2020 14:32:29 -0700 Subject: [PATCH 251/280] mm/page_alloc.c: fix kernel-doc warning Add description of function parameter 'mt' to fix kernel-doc warning: mm/page_alloc.c:3246: warning: Function parameter or member 'mt' not described in '__putback_isolated_page' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Acked-by: Pankaj Gupta Link: http://lkml.kernel.org/r/02998bd4-0b82-2f15-2570-f86130304d1e@infradead.org Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 114c56c3685d..d98441ab1036 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3224,6 +3224,7 @@ int __isolate_free_page(struct page *page, unsigned int order) * __putback_isolated_page - Return a now-isolated page back where we got it * @page: Page that was isolated * @order: Order of the isolated page + * @mt: The page's pageblock's migratetype * * This function is meant to return a page pulled from the free lists via * __isolate_free_page back to the free lists they were pulled from. From 8b885f53b03e3b14003083daf64a6ed16bf561b3 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Apr 2020 14:32:32 -0700 Subject: [PATCH 252/280] mm/page_alloc: make pcpu_drain_mutex and pcpu_drain static Fix the following sparse warning: mm/page_alloc.c:106:1: warning: symbol 'pcpu_drain_mutex' was not declared. Should it be static? mm/page_alloc.c:107:1: warning: symbol '__pcpu_scope_pcpu_drain' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/20200407023925.46438-1-yanaijie@huawei.com Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d98441ab1036..69827d4fa052 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -103,8 +103,8 @@ struct pcpu_drain { struct zone *zone; struct work_struct work; }; -DEFINE_MUTEX(pcpu_drain_mutex); -DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain); +static DEFINE_MUTEX(pcpu_drain_mutex); +static DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain); #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY volatile unsigned long latent_entropy __latent_entropy; From 783fda856e1034dee90a873f7654c418212d12d7 Mon Sep 17 00:00:00 2001 From: Changwei Ge Date: Fri, 10 Apr 2020 14:32:38 -0700 Subject: [PATCH 253/280] ocfs2: no need try to truncate file beyond i_size Linux fallocate(2) with FALLOC_FL_PUNCH_HOLE mode set, its offset can exceed the inode size. Ocfs2 now doesn't allow that offset beyond inode size. This restriction is not necessary and violates fallocate(2) semantics. If fallocate(2) offset is beyond inode size, just return success and do nothing further. Otherwise, ocfs2 will crash the kernel. kernel BUG at fs/ocfs2//alloc.c:7264! ocfs2_truncate_inline+0x20f/0x360 [ocfs2] ocfs2_remove_inode_range+0x23c/0xcb0 [ocfs2] __ocfs2_change_file_space+0x4a5/0x650 [ocfs2] ocfs2_fallocate+0x83/0xa0 [ocfs2] vfs_fallocate+0x148/0x230 SyS_fallocate+0x48/0x80 do_syscall_64+0x79/0x170 Signed-off-by: Changwei Ge Signed-off-by: Andrew Morton Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Link: http://lkml.kernel.org/r/20200407082754.17565-1-chge@linux.alibaba.com Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 65b3abbcce4e..2f834add165b 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7402,6 +7402,10 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_inline_data *idata = &di->id2.i_data; + /* No need to punch hole beyond i_size. */ + if (start >= i_size_read(inode)) + return 0; + if (end > i_size_read(inode)) end = i_size_read(inode); From 8676af1ff2d28e64e5636147821bda7524cf007d Mon Sep 17 00:00:00 2001 From: Aslan Bakirov Date: Fri, 10 Apr 2020 14:32:42 -0700 Subject: [PATCH 254/280] mm: cma: NUMA node interface I've noticed that there is no interface exposed by CMA which would let me to declare contigous memory on particular NUMA node. This patchset adds the ability to try to allocate contiguous memory on a specific node. It will fallback to other nodes if the specified one doesn't work. Implement a new method for declaring contigous memory on particular node and keep cma_declare_contiguous() as a wrapper. [akpm@linux-foundation.org: build fix] Signed-off-by: Aslan Bakirov Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: Andreas Schaufler Cc: Mike Kravetz Cc: Rik van Riel Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20200407163840.92263-2-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/cma.h | 14 ++++++++++++-- include/linux/memblock.h | 3 +++ mm/cma.c | 16 +++++++++------- mm/memblock.c | 2 +- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/include/linux/cma.h b/include/linux/cma.h index 190184b5ff32..6ff79fefd01f 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -4,6 +4,7 @@ #include #include +#include /* * There is always at least global CMA area and a few optional @@ -24,10 +25,19 @@ extern phys_addr_t cma_get_base(const struct cma *cma); extern unsigned long cma_get_size(const struct cma *cma); extern const char *cma_get_name(const struct cma *cma); -extern int __init cma_declare_contiguous(phys_addr_t base, +extern int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, const char *name, struct cma **res_cma); + bool fixed, const char *name, struct cma **res_cma, + int nid); +static inline int __init cma_declare_contiguous(phys_addr_t base, + phys_addr_t size, phys_addr_t limit, + phys_addr_t alignment, unsigned int order_per_bit, + bool fixed, const char *name, struct cma **res_cma) +{ + return cma_declare_contiguous_nid(base, size, limit, alignment, + order_per_bit, fixed, name, res_cma, NUMA_NO_NODE); +} extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, const char *name, diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 079d17d96410..6bc37a731d27 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -348,6 +348,9 @@ static inline int memblock_get_region_node(const struct memblock_region *r) phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end); +phys_addr_t memblock_alloc_range_nid(phys_addr_t size, + phys_addr_t align, phys_addr_t start, + phys_addr_t end, int nid, bool exact_nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, diff --git a/mm/cma.c b/mm/cma.c index be55d1988c67..0463ad2ce06b 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -220,7 +220,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, } /** - * cma_declare_contiguous() - reserve custom contiguous area + * cma_declare_contiguous_nid() - reserve custom contiguous area * @base: Base address of the reserved area optional, use 0 for any * @size: Size of the reserved area (in bytes), * @limit: End address of the reserved memory (optional, 0 for any). @@ -229,6 +229,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, * @fixed: hint about where to place the reserved area * @name: The name of the area. See function cma_init_reserved_mem() * @res_cma: Pointer to store the created cma region. + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node * * This function reserves memory from early allocator. It should be * called by arch specific code once the early allocator (memblock or bootmem) @@ -238,10 +239,11 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, * If @fixed is true, reserve contiguous area at exactly @base. If false, * reserve in range from @base to @limit. */ -int __init cma_declare_contiguous(phys_addr_t base, +int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, const char *name, struct cma **res_cma) + bool fixed, const char *name, struct cma **res_cma, + int nid) { phys_addr_t memblock_end = memblock_end_of_DRAM(); phys_addr_t highmem_start; @@ -336,14 +338,14 @@ int __init cma_declare_contiguous(phys_addr_t base, * memory in case of failure. */ if (base < highmem_start && limit > highmem_start) { - addr = memblock_phys_alloc_range(size, alignment, - highmem_start, limit); + addr = memblock_alloc_range_nid(size, alignment, + highmem_start, limit, nid, false); limit = highmem_start; } if (!addr) { - addr = memblock_phys_alloc_range(size, alignment, base, - limit); + addr = memblock_alloc_range_nid(size, alignment, base, + limit, nid, false); if (!addr) { ret = -ENOMEM; goto err; diff --git a/mm/memblock.c b/mm/memblock.c index 4d06bbaded0f..c79ba6f9920c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1349,7 +1349,7 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, * Return: * Physical address of allocated memory block on success, %0 on failure. */ -static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, +phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, int nid, bool exact_nid) From cf11e85fc08cc6a4fe3ac2ba2e610c962bf20bc3 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 10 Apr 2020 14:32:45 -0700 Subject: [PATCH 255/280] mm: hugetlb: optionally allocate gigantic hugepages using cma Commit 944d9fec8d7a ("hugetlb: add support for gigantic page allocation at runtime") has added the run-time allocation of gigantic pages. However it actually works only at early stages of the system loading, when the majority of memory is free. After some time the memory gets fragmented by non-movable pages, so the chances to find a contiguous 1GB block are getting close to zero. Even dropping caches manually doesn't help a lot. At large scale rebooting servers in order to allocate gigantic hugepages is quite expensive and complex. At the same time keeping some constant percentage of memory in reserved hugepages even if the workload isn't using it is a big waste: not all workloads can benefit from using 1 GB pages. The following solution can solve the problem: 1) On boot time a dedicated cma area* is reserved. The size is passed as a kernel argument. 2) Run-time allocations of gigantic hugepages are performed using the cma allocator and the dedicated cma area In this case gigantic hugepages can be allocated successfully with a high probability, however the memory isn't completely wasted if nobody is using 1GB hugepages: it can be used for pagecache, anon memory, THPs, etc. * On a multi-node machine a per-node cma area is allocated on each node. Following gigantic hugetlb allocation are using the first available numa node if the mask isn't specified by a user. Usage: 1) configure the kernel to allocate a cma area for hugetlb allocations: pass hugetlb_cma=10G as a kernel argument 2) allocate hugetlb pages as usual, e.g. echo 10 > /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages If the option isn't enabled or the allocation of the cma area failed, the current behavior of the system is preserved. x86 and arm-64 are covered by this patch, other architectures can be trivially added later. The patch contains clean-ups and fixes proposed and implemented by Aslan Bakirov and Randy Dunlap. It also contains ideas and suggestions proposed by Rik van Riel, Michal Hocko and Mike Kravetz. Thanks! Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Tested-by: Andreas Schaufler Acked-by: Mike Kravetz Acked-by: Michal Hocko Cc: Aslan Bakirov Cc: Randy Dunlap Cc: Rik van Riel Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20200407163840.92263-3-guro@fb.com Signed-off-by: Linus Torvalds --- .../admin-guide/kernel-parameters.txt | 8 ++ arch/arm64/mm/init.c | 6 + arch/x86/kernel/setup.c | 4 + include/linux/hugetlb.h | 12 ++ mm/hugetlb.c | 109 ++++++++++++++++++ 5 files changed, 139 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 86aae1fa099a..d7df9a8302c4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1475,6 +1475,14 @@ hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET registers. Default set by CONFIG_HPET_MMAP_DEFAULT. + hugetlb_cma= [HW] The size of a cma area used for allocation + of gigantic hugepages. + Format: nn[KMGTPE] + + Reserve a cma area of given size and allocate gigantic + hugepages using the cma allocator. If enabled, the + boot-time allocation of gigantic hugepages is skipped. + hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot. hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages. On x86-64 and powerpc, this option can be specified diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index b65dffdfb201..e42727e3568e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -457,6 +458,11 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; dma_contiguous_reserve(arm64_dma32_phys_limit); + +#ifdef CONFIG_ARM64_4K_PAGES + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); +#endif + } void __init bootmem_init(void) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e6b545047f38..4b3fa6cd3106 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -1157,6 +1158,9 @@ void __init setup_arch(char **cmdline_p) initmem_init(); dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); + if (boot_cpu_has(X86_FEATURE_GBPAGES)) + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); + /* * Reserve memory for crash kernel after SRAT is parsed so that it * won't consume hotpluggable memory. diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5ea05879a0a9..43a1cef8f0f1 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -895,4 +895,16 @@ static inline spinlock_t *huge_pte_lock(struct hstate *h, return ptl; } +#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) +extern void __init hugetlb_cma_reserve(int order); +extern void __init hugetlb_cma_check(void); +#else +static inline __init void hugetlb_cma_reserve(int order) +{ +} +static inline __init void hugetlb_cma_check(void) +{ +} +#endif + #endif /* _LINUX_HUGETLB_H */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f5fb53fdfa02..cd459155d28a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -44,6 +45,9 @@ int hugetlb_max_hstate __read_mostly; unsigned int default_hstate_idx; struct hstate hstates[HUGE_MAX_HSTATE]; + +static struct cma *hugetlb_cma[MAX_NUMNODES]; + /* * Minimum page order among possible hugepage sizes, set to a proper value * at boot time. @@ -1228,6 +1232,14 @@ static void destroy_compound_gigantic_page(struct page *page, static void free_gigantic_page(struct page *page, unsigned int order) { + /* + * If the page isn't allocated using the cma allocator, + * cma_release() returns false. + */ + if (IS_ENABLED(CONFIG_CMA) && + cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order)) + return; + free_contig_range(page_to_pfn(page), 1 << order); } @@ -1237,6 +1249,21 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, { unsigned long nr_pages = 1UL << huge_page_order(h); + if (IS_ENABLED(CONFIG_CMA)) { + struct page *page; + int node; + + for_each_node_mask(node, *nodemask) { + if (!hugetlb_cma[node]) + continue; + + page = cma_alloc(hugetlb_cma[node], nr_pages, + huge_page_order(h), true); + if (page) + return page; + } + } + return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask); } @@ -1281,8 +1308,14 @@ static void update_and_free_page(struct hstate *h, struct page *page) set_compound_page_dtor(page, NULL_COMPOUND_DTOR); set_page_refcounted(page); if (hstate_is_gigantic(h)) { + /* + * Temporarily drop the hugetlb_lock, because + * we might block in free_gigantic_page(). + */ + spin_unlock(&hugetlb_lock); destroy_compound_gigantic_page(page, huge_page_order(h)); free_gigantic_page(page, huge_page_order(h)); + spin_lock(&hugetlb_lock); } else { __free_pages(page, huge_page_order(h)); } @@ -2539,6 +2572,10 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) for (i = 0; i < h->max_huge_pages; ++i) { if (hstate_is_gigantic(h)) { + if (IS_ENABLED(CONFIG_CMA) && hugetlb_cma[0]) { + pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n"); + break; + } if (!alloc_bootmem_huge_page(h)) break; } else if (!alloc_pool_huge_page(h, @@ -3194,6 +3231,7 @@ static int __init hugetlb_init(void) default_hstate.max_huge_pages = default_hstate_max_huge_pages; } + hugetlb_cma_check(); hugetlb_init_hstates(); gather_bootmem_prealloc(); report_hugepages(); @@ -5506,3 +5544,74 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) spin_unlock(&hugetlb_lock); } } + +#ifdef CONFIG_CMA +static unsigned long hugetlb_cma_size __initdata; +static bool cma_reserve_called __initdata; + +static int __init cmdline_parse_hugetlb_cma(char *p) +{ + hugetlb_cma_size = memparse(p, &p); + return 0; +} + +early_param("hugetlb_cma", cmdline_parse_hugetlb_cma); + +void __init hugetlb_cma_reserve(int order) +{ + unsigned long size, reserved, per_node; + int nid; + + cma_reserve_called = true; + + if (!hugetlb_cma_size) + return; + + if (hugetlb_cma_size < (PAGE_SIZE << order)) { + pr_warn("hugetlb_cma: cma area should be at least %lu MiB\n", + (PAGE_SIZE << order) / SZ_1M); + return; + } + + /* + * If 3 GB area is requested on a machine with 4 numa nodes, + * let's allocate 1 GB on first three nodes and ignore the last one. + */ + per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes); + pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", + hugetlb_cma_size / SZ_1M, per_node / SZ_1M); + + reserved = 0; + for_each_node_state(nid, N_ONLINE) { + int res; + + size = min(per_node, hugetlb_cma_size - reserved); + size = round_up(size, PAGE_SIZE << order); + + res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order, + 0, false, "hugetlb", + &hugetlb_cma[nid], nid); + if (res) { + pr_warn("hugetlb_cma: reservation failed: err %d, node %d", + res, nid); + continue; + } + + reserved += size; + pr_info("hugetlb_cma: reserved %lu MiB on node %d\n", + size / SZ_1M, nid); + + if (reserved >= hugetlb_cma_size) + break; + } +} + +void __init hugetlb_cma_check(void) +{ + if (!hugetlb_cma_size || cma_reserve_called) + return; + + pr_warn("hugetlb_cma: the option isn't supported by current arch\n"); +} + +#endif /* CONFIG_CMA */ From 09ef5283fd96ac424ef0e569626f359bf9ab86c9 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Fri, 10 Apr 2020 14:32:48 -0700 Subject: [PATCH 256/280] mm/mmap.c: initialize align_offset explicitly for vm_unmapped_area On passing requirement to vm_unmapped_area, arch_get_unmapped_area and arch_get_unmapped_area_topdown did not set align_offset. Internally on both unmapped_area and unmapped_area_topdown, if info->align_mask is 0, then info->align_offset was meaningless. But commit df529cabb7a2 ("mm: mmap: add trace point of vm_unmapped_area") always prints info->align_offset even though it is uninitialized. Fix this uninitialized value issue by setting it to 0 explicitly. Before: vm_unmapped_area: addr=0x755b155000 err=0 total_vm=0x15aaf0 flags=0x1 len=0x109000 lo=0x8000 hi=0x75eed48000 mask=0x0 ofs=0x4022 After: vm_unmapped_area: addr=0x74a4ca1000 err=0 total_vm=0x168ab1 flags=0x1 len=0x9000 lo=0x8000 hi=0x753d94b000 mask=0x0 ofs=0x0 Signed-off-by: Jaewon Kim Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Matthew Wilcox (Oracle) Cc: Michel Lespinasse Cc: Borislav Petkov Link: http://lkml.kernel.org/r/20200409094035.19457-1-jaewon31.kim@samsung.com Signed-off-by: Linus Torvalds --- mm/mmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 8d77dbbb80fe..de07bbc0e21f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2123,6 +2123,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, info.low_limit = mm->mmap_base; info.high_limit = mmap_end; info.align_mask = 0; + info.align_offset = 0; return vm_unmapped_area(&info); } #endif @@ -2164,6 +2165,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, info.low_limit = max(PAGE_SIZE, mmap_min_addr); info.high_limit = arch_get_mmap_base(addr, mm->mmap_base); info.align_mask = 0; + info.align_offset = 0; addr = vm_unmapped_area(&info); /* From 8efd6f5b1732c4ac88b4bb6908d481d95804fa1c Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:32:51 -0700 Subject: [PATCH 257/280] mm/memory.c: refactor insert_page to prepare for batched-lock insert Add helper methods for vm_insert_page()/insert_page() to prepare for vm_insert_pages(), which batch-inserts pages to reduce spinlock operations when inserting multiple consecutive pages into the user page table. The intention of this patch-set is to reduce atomic ops for tcp zerocopy receives, which normally hits the same spinlock multiple times consecutively. Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Andrew Morton Cc: David Miller Cc: Matthew Wilcox Cc: Jason Gunthorpe Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200128025958.43490-1-arjunroy.kdev@gmail.com Signed-off-by: Linus Torvalds --- mm/memory.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 19874d133a66..52a3303458cb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1442,6 +1442,27 @@ pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, return pte_alloc_map_lock(mm, pmd, addr, ptl); } +static int validate_page_before_insert(struct page *page) +{ + if (PageAnon(page) || PageSlab(page) || page_has_type(page)) + return -EINVAL; + flush_dcache_page(page); + return 0; +} + +static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte, + unsigned long addr, struct page *page, pgprot_t prot) +{ + if (!pte_none(*pte)) + return -EBUSY; + /* Ok, finally just insert the thing.. */ + get_page(page); + inc_mm_counter_fast(mm, mm_counter_file(page)); + page_add_file_rmap(page, false); + set_pte_at(mm, addr, pte, mk_pte(page, prot)); + return 0; +} + /* * This is the old fallback for page remapping. * @@ -1457,26 +1478,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, pte_t *pte; spinlock_t *ptl; - retval = -EINVAL; - if (PageAnon(page) || PageSlab(page) || page_has_type(page)) + retval = validate_page_before_insert(page); + if (retval) goto out; retval = -ENOMEM; - flush_dcache_page(page); pte = get_locked_pte(mm, addr, &ptl); if (!pte) goto out; - retval = -EBUSY; - if (!pte_none(*pte)) - goto out_unlock; - - /* Ok, finally just insert the thing.. */ - get_page(page); - inc_mm_counter_fast(mm, mm_counter_file(page)); - page_add_file_rmap(page, false); - set_pte_at(mm, addr, pte, mk_pte(page, prot)); - - retval = 0; -out_unlock: + retval = insert_page_into_pte_locked(mm, pte, addr, page, prot); pte_unmap_unlock(pte, ptl); out: return retval; From 251a0ffeaeee2a900765d98d44880943dce1047d Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:32:54 -0700 Subject: [PATCH 258/280] mm: bring sparc pte_index() semantics inline with other platforms pte_index() on platforms other than sparc return a numerical index. On sparc, it returns a pte_t*. This presents an issue for vm_insert_pages(), which relies on pte_index() to find the offset for a pte within a pmd, for batched inserts. This patch: 1. Modifies pte_index() for sparc to return a numerical index, like other platforms, 2. Defines pte_entry() for sparc which returns a pte_t* (as pte_index() used to), 3. Converts existing sparc callers for pte_index() to use pte_entry(). [sfr@canb.auug.org.au: remove pte_entry and just directly modified pte_offset_kernel instead] Signed-off-by: Arjun Roy Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Reviewed-by: Mike Rapoport Cc: Eric Dumazet Cc: Soheil Hassas Yeganeh Cc: David Miller Cc: Matthew Wilcox Cc: Arjun Roy Cc: Jason Gunthorpe Link: http://lkml.kernel.org/r/20200227105045.6b421d9f@canb.auug.org.au Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/pgtable_64.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 65494c3a420e..da527b27cf7d 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -907,11 +907,11 @@ static inline unsigned long pud_pfn(pud_t pud) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))) /* Find an entry in the third-level page table.. */ -#define pte_index(dir, address) \ - ((pte_t *) __pmd_page(*(dir)) + \ - ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) -#define pte_offset_kernel pte_index -#define pte_offset_map pte_index +#define pte_index(address) \ + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset_kernel(dir, address) \ + ((pte_t *) __pmd_page(*(dir)) + pte_index(address)) +#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_unmap(pte) do { } while (0) /* We cannot include at this point yet: */ From c97078bd219cbe1a878b24bb4e61d312f19ece1f Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:32:58 -0700 Subject: [PATCH 259/280] mm: define pte_index as macro for x86 pte_index() is either defined as a macro (e.g. sparc64) or as an inlined function (e.g. x86). vm_insert_pages() depends on pte_index but it is not defined on all platforms (e.g. m68k). To fix compilation of vm_insert_pages() on architectures not providing pte_index(), we perform the following fix: 0. For platforms where it is meaningful, and defined as a macro, no change is needed. 1. For platforms where it is meaningful and defined as an inlined function, and we want to use it with vm_insert_pages(), we define a degenerate macro of the form: #define pte_index pte_index 2. vm_insert_pages() checks for the existence of a pte_index macro definition. If found, it implements a batched insert. If not found, it devolves to calling vm_insert_page() in a loop. This patch implements step 1 for x86. v3 of this patch fixes a compilation warning for an unused method. v2 of this patch moved a macro definition to a more readable location. Signed-off-by: Arjun Roy Signed-off-by: Andrew Morton Cc: David Miller Cc: Eric Dumazet Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Soheil Hassas Yeganeh Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200228054714.204424-1-arjunroy.kdev@gmail.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 28838d790191..abad0da0973a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -860,7 +860,10 @@ static inline unsigned long pmd_index(unsigned long address) * * this function returns the index of the entry in the pte page which would * control the given virtual address + * + * Also define macro so we can test if pte_index is defined for arch. */ +#define pte_index pte_index static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); From 8cd3984d81d5fd5e18bccb12d7d228a114ec2508 Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:33:01 -0700 Subject: [PATCH 260/280] mm/memory.c: add vm_insert_pages() Add the ability to insert multiple pages at once to a user VM with lower PTE spinlock operations. The intention of this patch-set is to reduce atomic ops for tcp zerocopy receives, which normally hits the same spinlock multiple times consecutively. [akpm@linux-foundation.org: pte_alloc() no longer takes the `addr' argument] [arjunroy@google.com: add missing page_count() check to vm_insert_pages()] Link: http://lkml.kernel.org/r/20200214005929.104481-1-arjunroy.kdev@gmail.com [arjunroy@google.com: vm_insert_pages() checks if pte_index defined] Link: http://lkml.kernel.org/r/20200228054714.204424-2-arjunroy.kdev@gmail.com Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Andrew Morton Cc: David Miller Cc: Matthew Wilcox Cc: Jason Gunthorpe Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200128025958.43490-2-arjunroy.kdev@gmail.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 + mm/memory.c | 129 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 129 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e2f938c5a9d8..ed896cedd4c4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2689,6 +2689,8 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); +int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num); int vm_map_pages(struct vm_area_struct *vma, struct page **pages, unsigned long num); int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, diff --git a/mm/memory.c b/mm/memory.c index 52a3303458cb..f703fe8c8346 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1419,8 +1419,7 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, } EXPORT_SYMBOL_GPL(zap_vma_ptes); -pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, - spinlock_t **ptl) +static pmd_t *walk_to_pmd(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; p4d_t *p4d; @@ -1439,6 +1438,16 @@ pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, return NULL; VM_BUG_ON(pmd_trans_huge(*pmd)); + return pmd; +} + +pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl) +{ + pmd_t *pmd = walk_to_pmd(mm, addr); + + if (!pmd) + return NULL; return pte_alloc_map_lock(mm, pmd, addr, ptl); } @@ -1491,6 +1500,122 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, return retval; } +#ifdef pte_index +static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, struct page *page, pgprot_t prot) +{ + int err; + + if (!page_count(page)) + return -EINVAL; + err = validate_page_before_insert(page); + return err ? err : insert_page_into_pte_locked( + mm, pte_offset_map(pmd, addr), addr, page, prot); +} + +/* insert_pages() amortizes the cost of spinlock operations + * when inserting pages in a loop. Arch *must* define pte_index. + */ +static int insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num, pgprot_t prot) +{ + pmd_t *pmd = NULL; + spinlock_t *pte_lock = NULL; + struct mm_struct *const mm = vma->vm_mm; + unsigned long curr_page_idx = 0; + unsigned long remaining_pages_total = *num; + unsigned long pages_to_write_in_pmd; + int ret; +more: + ret = -EFAULT; + pmd = walk_to_pmd(mm, addr); + if (!pmd) + goto out; + + pages_to_write_in_pmd = min_t(unsigned long, + remaining_pages_total, PTRS_PER_PTE - pte_index(addr)); + + /* Allocate the PTE if necessary; takes PMD lock once only. */ + ret = -ENOMEM; + if (pte_alloc(mm, pmd)) + goto out; + pte_lock = pte_lockptr(mm, pmd); + + while (pages_to_write_in_pmd) { + int pte_idx = 0; + const int batch_size = min_t(int, pages_to_write_in_pmd, 8); + + spin_lock(pte_lock); + for (; pte_idx < batch_size; ++pte_idx) { + int err = insert_page_in_batch_locked(mm, pmd, + addr, pages[curr_page_idx], prot); + if (unlikely(err)) { + spin_unlock(pte_lock); + ret = err; + remaining_pages_total -= pte_idx; + goto out; + } + addr += PAGE_SIZE; + ++curr_page_idx; + } + spin_unlock(pte_lock); + pages_to_write_in_pmd -= batch_size; + remaining_pages_total -= batch_size; + } + if (remaining_pages_total) + goto more; + ret = 0; +out: + *num = remaining_pages_total; + return ret; +} +#endif /* ifdef pte_index */ + +/** + * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock. + * @vma: user vma to map to + * @addr: target start user address of these pages + * @pages: source kernel pages + * @num: in: number of pages to map. out: number of pages that were *not* + * mapped. (0 means all pages were successfully mapped). + * + * Preferred over vm_insert_page() when inserting multiple pages. + * + * In case of error, we may have mapped a subset of the provided + * pages. It is the caller's responsibility to account for this case. + * + * The same restrictions apply as in vm_insert_page(). + */ +int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num) +{ +#ifdef pte_index + const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1; + + if (addr < vma->vm_start || end_addr >= vma->vm_end) + return -EFAULT; + if (!(vma->vm_flags & VM_MIXEDMAP)) { + BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem)); + BUG_ON(vma->vm_flags & VM_PFNMAP); + vma->vm_flags |= VM_MIXEDMAP; + } + /* Defer page refcount checking till we're about to map that page. */ + return insert_pages(vma, addr, pages, num, vma->vm_page_prot); +#else + unsigned long idx = 0, pgcount = *num; + int err; + + for (; idx < pgcount; ++idx) { + err = vm_insert_page(vma, addr + (PAGE_SIZE * idx), pages[idx]); + if (err) + break; + } + *num = pgcount - idx; + return err; +#endif /* ifdef pte_index */ +} +EXPORT_SYMBOL(vm_insert_pages); + /** * vm_insert_page - insert single page into user vma * @vma: user vma to map to From c62da0c35d58518ddb26ff641d2485596567fd96 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:05 -0700 Subject: [PATCH 261/280] mm/vma: define a default value for VM_DATA_DEFAULT_FLAGS There are many platforms with exact same value for VM_DATA_DEFAULT_FLAGS This creates a default value for VM_DATA_DEFAULT_FLAGS in line with the existing VM_STACK_DEFAULT_FLAGS. While here, also define some more macros with standard VMA access flag combinations that are used frequently across many platforms. Apart from simplification, this reduces code duplication as well. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Acked-by: Geert Uytterhoeven Cc: Richard Henderson Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Mark Salter Cc: Guo Ren Cc: Yoshinori Sato Cc: Brian Cain Cc: Tony Luck Cc: Michal Simek Cc: Ralf Baechle Cc: Paul Burton Cc: Nick Hu Cc: Ley Foon Tan Cc: Jonas Bonn Cc: "James E.J. Bottomley" Cc: Michael Ellerman Cc: Paul Walmsley Cc: Heiko Carstens Cc: Rich Felker Cc: "David S. Miller" Cc: Guan Xuetao Cc: Thomas Gleixner Cc: Jeff Dike Cc: Chris Zankel Link: http://lkml.kernel.org/r/1583391014-8170-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/page.h | 3 --- arch/arc/include/asm/page.h | 2 +- arch/arm/include/asm/page.h | 4 +--- arch/arm64/include/asm/page.h | 4 +--- arch/c6x/include/asm/page.h | 5 +---- arch/csky/include/asm/page.h | 3 --- arch/h8300/include/asm/page.h | 2 -- arch/hexagon/include/asm/page.h | 3 +-- arch/ia64/include/asm/page.h | 5 +---- arch/m68k/include/asm/page.h | 3 --- arch/microblaze/include/asm/page.h | 2 -- arch/mips/include/asm/page.h | 5 +---- arch/nds32/include/asm/page.h | 3 --- arch/nios2/include/asm/page.h | 3 +-- arch/openrisc/include/asm/page.h | 5 ----- arch/parisc/include/asm/page.h | 3 --- arch/powerpc/include/asm/page.h | 9 ++------- arch/powerpc/include/asm/page_64.h | 7 ++----- arch/riscv/include/asm/page.h | 3 +-- arch/s390/include/asm/page.h | 3 +-- arch/sh/include/asm/page.h | 3 --- arch/sparc/include/asm/page_32.h | 3 --- arch/sparc/include/asm/page_64.h | 3 --- arch/unicore32/include/asm/page.h | 3 --- arch/x86/include/asm/page_types.h | 4 +--- arch/x86/um/asm/vm-flags.h | 10 ++-------- arch/xtensa/include/asm/page.h | 3 --- include/linux/mm.h | 14 ++++++++++++++ 28 files changed, 31 insertions(+), 89 deletions(-) diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h index f3fb2848470a..e241bd88880f 100644 --- a/arch/alpha/include/asm/page.h +++ b/arch/alpha/include/asm/page.h @@ -90,9 +90,6 @@ typedef struct page *pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #endif /* CONFIG_DISCONTIGMEM */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #include diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 0a32e8cfd074..b0dfed0f12be 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -102,7 +102,7 @@ typedef pte_t * pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) /* Default Permissions for stack/heaps pages (Non Executable) */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #define WANT_PAGE_VIRTUAL 1 diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index c2b75cba26df..11b058a72a5b 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -161,9 +161,7 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 75d6cd23a679..c01b52add377 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -36,9 +36,7 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include diff --git a/arch/c6x/include/asm/page.h b/arch/c6x/include/asm/page.h index 70db1e7632bc..40079899084d 100644 --- a/arch/c6x/include/asm/page.h +++ b/arch/c6x/include/asm/page.h @@ -2,10 +2,7 @@ #ifndef _ASM_C6X_PAGE_H #define _ASM_C6X_PAGE_H -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index 9738eacefdc7..9b98bf31d57c 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -85,9 +85,6 @@ extern unsigned long va_pa_offset; PHYS_OFFSET_OFFSET) #define virt_to_page(x) (mem_map + MAP_NR(x)) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #define pfn_to_kaddr(x) __va(PFN_PHYS(x)) #include diff --git a/arch/h8300/include/asm/page.h b/arch/h8300/include/asm/page.h index 8da5124ad344..53e037544239 100644 --- a/arch/h8300/include/asm/page.h +++ b/arch/h8300/include/asm/page.h @@ -6,8 +6,6 @@ #include #define MAP_NR(addr) (((uintptr_t)(addr)-PAGE_OFFSET) >> PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #ifndef __ASSEMBLY__ extern unsigned long rom_length; diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h index ee31f36f48f3..7cbf719c578e 100644 --- a/arch/hexagon/include/asm/page.h +++ b/arch/hexagon/include/asm/page.h @@ -93,8 +93,7 @@ struct page; #define virt_to_page(kaddr) pfn_to_page(PFN_DOWN(__pa(kaddr))) /* Default vm area behavior is non-executable. */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #define pfn_valid(pfn) ((pfn) < max_mapnr) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h index 5798bd2b462c..b69a5499d75b 100644 --- a/arch/ia64/include/asm/page.h +++ b/arch/ia64/include/asm/page.h @@ -218,10 +218,7 @@ get_order (unsigned long size) #define PAGE_OFFSET RGN_BASE(RGN_KERNEL) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | \ - (((current->personality & READ_IMPLIES_EXEC) != 0) \ - ? VM_EXEC : 0)) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #define GATE_ADDR RGN_BASE(RGN_GATE) diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h index da546487e177..2614a1206f2f 100644 --- a/arch/m68k/include/asm/page.h +++ b/arch/m68k/include/asm/page.h @@ -65,9 +65,6 @@ extern unsigned long _ramend; #define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) #define __pfn_to_phys(pfn) PFN_PHYS(pfn) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #endif /* _M68K_PAGE_H */ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index ae7215c94706..b13463d39b38 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -194,8 +194,6 @@ extern int page_is_ram(unsigned long pfn); #ifdef CONFIG_MMU -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #endif /* CONFIG_MMU */ #endif /* __KERNEL__ */ diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index 0ba4ce6e2bf3..e2f503fc7a84 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -253,10 +253,7 @@ extern bool __virt_addr_valid(const volatile void *kaddr); #define virt_addr_valid(kaddr) \ __virt_addr_valid((const volatile void *) (kaddr)) -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include #include diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h index 86b32014c5f9..add33a7f02c8 100644 --- a/arch/nds32/include/asm/page.h +++ b/arch/nds32/include/asm/page.h @@ -59,9 +59,6 @@ typedef struct page *pgtable_t; #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #endif /* __KERNEL__ */ #endif diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h index 79fcac61f6ef..6a989819a7c1 100644 --- a/arch/nios2/include/asm/page.h +++ b/arch/nios2/include/asm/page.h @@ -98,8 +98,7 @@ static inline bool pfn_valid(unsigned long pfn) # define virt_to_page(vaddr) pfn_to_page(PFN_DOWN(virt_to_phys(vaddr))) # define virt_addr_valid(vaddr) pfn_valid(PFN_DOWN(virt_to_phys(vaddr))) -# define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +# define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h index 01069db59454..aab6e64d6db4 100644 --- a/arch/openrisc/include/asm/page.h +++ b/arch/openrisc/include/asm/page.h @@ -86,11 +86,6 @@ typedef struct page *pgtable_t; #endif /* __ASSEMBLY__ */ - -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - - #include #include diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index 796ae29e9b9a..6b3f6740a6a6 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -180,9 +180,6 @@ extern int npmem_ranges; #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #include #include diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 080a0bf8e54b..3ee8df0f66e0 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -240,13 +240,8 @@ static inline bool pfn_valid(unsigned long pfn) * and needs to be executable. This means the whole heap ends * up being executable. */ -#define VM_DATA_DEFAULT_FLAGS32 \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS32 VM_DATA_FLAGS_TSK_EXEC +#define VM_DATA_DEFAULT_FLAGS64 VM_DATA_FLAGS_NON_EXEC #ifdef __powerpc64__ #include diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 5962797f784a..79a9b7c6a132 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -94,11 +94,8 @@ extern u64 ppc64_pft_size; * stack by default, so in the absence of a PT_GNU_STACK program header * we turn execute permission off. */ -#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS32 VM_DATA_FLAGS_EXEC +#define VM_STACK_DEFAULT_FLAGS64 VM_DATA_FLAGS_NON_EXEC #define VM_STACK_DEFAULT_FLAGS \ (is_32bit_task() ? \ diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 8ca1930caa44..2d50f76efe48 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -137,8 +137,7 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include #include diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index f2d4c1bd3429..cc98f9b78fd4 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -181,8 +181,7 @@ int arch_make_page_accessible(struct page *page); #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include #include diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h index 5eef8be3e59f..ea8d68f58e39 100644 --- a/arch/sh/include/asm/page.h +++ b/arch/sh/include/asm/page.h @@ -182,9 +182,6 @@ typedef struct page *pgtable_t; #endif #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #include diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h index b76d59edec8c..478260002836 100644 --- a/arch/sparc/include/asm/page_32.h +++ b/arch/sparc/include/asm/page_32.h @@ -133,9 +133,6 @@ extern unsigned long pfn_base; #define pfn_valid(pfn) (((pfn) >= (pfn_base)) && (((pfn)-(pfn_base)) < max_mapnr)) #define virt_addr_valid(kaddr) ((((unsigned long)(kaddr)-PAGE_OFFSET)>>PAGE_SHIFT) < max_mapnr) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #include diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index e80f2d5bf62f..254dffd85fb1 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -158,9 +158,6 @@ extern unsigned long PAGE_OFFSET; #endif /* !(__ASSEMBLY__) */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #endif /* _SPARC64_PAGE_H */ diff --git a/arch/unicore32/include/asm/page.h b/arch/unicore32/include/asm/page.h index 8a89335673f9..96d6bdf180bd 100644 --- a/arch/unicore32/include/asm/page.h +++ b/arch/unicore32/include/asm/page.h @@ -69,9 +69,6 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #endif diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index c85e15010f48..e27aa6be6320 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -35,9 +35,7 @@ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \ CONFIG_PHYSICAL_ALIGN) diff --git a/arch/x86/um/asm/vm-flags.h b/arch/x86/um/asm/vm-flags.h index 7c297e9e2413..df7a3896f5dd 100644 --- a/arch/x86/um/asm/vm-flags.h +++ b/arch/x86/um/asm/vm-flags.h @@ -9,17 +9,11 @@ #ifdef CONFIG_X86_32 -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #else -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \ - VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_DATA_FLAGS_EXEC) #endif #endif diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h index f4771c29c7e9..37ce25ef92d6 100644 --- a/arch/xtensa/include/asm/page.h +++ b/arch/xtensa/include/asm/page.h @@ -203,8 +203,5 @@ static inline unsigned long ___pa(unsigned long va) #endif /* __ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #endif /* _XTENSA_PAGE_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index ed896cedd4c4..33076fa149c8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -343,6 +343,20 @@ extern unsigned int kobjsize(const void *objp); /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) +#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) + +/* Common data flag combinations */ +#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \ + VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */ +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC +#endif + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif From 6cb4d9a2870d2062e34c93bfef4d52fca3fe42d1 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:09 -0700 Subject: [PATCH 262/280] mm/vma: introduce VM_ACCESS_FLAGS There are many places where all basic VMA access flags (read, write, exec) are initialized or checked against as a group. One such example is during page fault. Existing vma_is_accessible() wrapper already creates the notion of VMA accessibility as a group access permissions. Hence lets just create VM_ACCESS_FLAGS (VM_READ|VM_WRITE|VM_EXEC) which will not only reduce code duplication but also extend the VMA accessibility concept in general. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Cc: Russell King Cc: Catalin Marinas Cc: Mark Salter Cc: Nick Hu Cc: Ley Foon Tan Cc: Michael Ellerman Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Guan Xuetao Cc: Dave Hansen Cc: Thomas Gleixner Cc: Rob Springer Cc: Greg Kroah-Hartman Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/1583391014-8170-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/arm/mm/fault.c | 2 +- arch/arm64/mm/fault.c | 2 +- arch/nds32/mm/fault.c | 2 +- arch/powerpc/mm/book3s64/pkeys.c | 2 +- arch/s390/mm/fault.c | 2 +- arch/unicore32/mm/fault.c | 2 +- arch/x86/mm/pkeys.c | 2 +- drivers/staging/gasket/gasket_core.c | 2 +- include/linux/mm.h | 6 +++++- mm/mmap.c | 2 +- mm/mprotect.c | 4 ++-- 11 files changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index b598e6978b29..2dd5c41cbb8d 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -189,7 +189,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) */ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) { - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mask = VM_ACCESS_FLAGS; if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) mask = VM_WRITE; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1027851d469a..c9cedc0432d2 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -445,7 +445,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned long vm_flags = VM_ACCESS_FLAGS; unsigned int mm_flags = FAULT_FLAG_DEFAULT; if (kprobe_page_fault(regs, esr)) diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 0cf0c08c7da2..f331e533edc2 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -79,7 +79,7 @@ void do_page_fault(unsigned long entry, unsigned long addr, struct vm_area_struct *vma; int si_code; vm_fault_t fault; - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mask = VM_ACCESS_FLAGS; unsigned int flags = FAULT_FLAG_DEFAULT; error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE); diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 07527f1ed108..1199fc2bfaec 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -315,7 +315,7 @@ int __execute_only_pkey(struct mm_struct *mm) static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) { /* Do this check first since the vm_flags should be hot */ - if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC) + if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC) return false; return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index d56f67745e3e..9822a1fd1c6b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -580,7 +580,7 @@ void do_dat_exception(struct pt_regs *regs) int access; vm_fault_t fault; - access = VM_READ | VM_EXEC | VM_WRITE; + access = VM_ACCESS_FLAGS; fault = do_exception(regs, access); if (unlikely(fault)) do_fault_error(regs, access, fault); diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index a9bd08fbe588..3022104aa613 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -149,7 +149,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) */ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) { - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mask = VM_ACCESS_FLAGS; if (!(fsr ^ 0x12)) /* write? */ mask = VM_WRITE; diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index c6f84c0b5d7a..8873ed1438a9 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -63,7 +63,7 @@ int __execute_only_pkey(struct mm_struct *mm) static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) { /* Do this check first since the vm_flags should be hot */ - if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC) + if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC) return false; if (vma_pkey(vma) != vma->vm_mm->context.execute_only_pkey) return false; diff --git a/drivers/staging/gasket/gasket_core.c b/drivers/staging/gasket/gasket_core.c index cd181a64f737..8e0575fcb4c8 100644 --- a/drivers/staging/gasket/gasket_core.c +++ b/drivers/staging/gasket/gasket_core.c @@ -689,7 +689,7 @@ static bool gasket_mmap_has_permissions(struct gasket_dev *gasket_dev, /* Make sure that no wrong flags are set. */ requested_permissions = - (vma->vm_flags & (VM_WRITE | VM_READ | VM_EXEC)); + (vma->vm_flags & VM_ACCESS_FLAGS); if (requested_permissions & ~(bar_permissions)) { dev_dbg(gasket_dev->dev, "Attempting to map a region with requested permissions 0x%x, but region has permissions 0x%x.\n", diff --git a/include/linux/mm.h b/include/linux/mm.h index 33076fa149c8..4db1522d7c48 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -369,6 +369,10 @@ extern unsigned int kobjsize(const void *objp); #define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) +/* VMA basic access permission flags */ +#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) + + /* * Special vmas that are non-mergable, non-mlock()able. */ @@ -646,7 +650,7 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma) static inline bool vma_is_accessible(struct vm_area_struct *vma) { - return vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC); + return vma->vm_flags & VM_ACCESS_FLAGS; } #ifdef CONFIG_SHMEM diff --git a/mm/mmap.c b/mm/mmap.c index de07bbc0e21f..f609e9ec4a25 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1224,7 +1224,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct * return a->vm_end == b->vm_start && mpol_equal(vma_policy(a), vma_policy(b)) && a->vm_file == b->vm_file && - !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) && + !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) && b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); } diff --git a/mm/mprotect.c b/mm/mprotect.c index 1d823b050329..494192ca954b 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -419,7 +419,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, */ if (arch_has_pfn_modify_check() && (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && - (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) { + (newflags & VM_ACCESS_FLAGS) == 0) { pgprot_t new_pgprot = vm_get_page_prot(newflags); error = walk_page_range(current->mm, start, end, @@ -598,7 +598,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, newflags |= (vma->vm_flags & ~mask_off_old_flags); /* newflags >> 4 shift VM_MAY% in place of VM_% */ - if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { + if ((newflags & ~(newflags >> 4)) & VM_ACCESS_FLAGS) { error = -EACCES; goto out; } From 78e7c5af080b86e9f28afac5a8307ddab1d2c1a3 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:13 -0700 Subject: [PATCH 263/280] mm/special: create generic fallbacks for pte_special() and pte_mkspecial() Currently there are many platforms that dont enable ARCH_HAS_PTE_SPECIAL but required to define quite similar fallback stubs for special page table entry helpers such as pte_special() and pte_mkspecial(), as they get build in generic MM without a config check. This creates two generic fallback stub definitions for these helpers, eliminating much code duplication. mips platform has a special case where pte_special() and pte_mkspecial() visibility is wider than what ARCH_HAS_PTE_SPECIAL enablement requires. This restricts those symbol visibility in order to avoid redefinitions which is now exposed through this new generic stubs and subsequent build failure. arm platform set_pte_at() definition needs to be moved into a C file just to prevent a build failure. [anshuman.khandual@arm.com: use defined(CONFIG_ARCH_HAS_PTE_SPECIAL) in mips per Thomas] Link: http://lkml.kernel.org/r/1583851924-21603-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Acked-by: Guo Ren [csky] Acked-by: Geert Uytterhoeven [m68k] Acked-by: Stafford Horne [openrisc] Acked-by: Helge Deller [parisc] Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Russell King Cc: Brian Cain Cc: Tony Luck Cc: Fenghua Yu Cc: Sam Creasey Cc: Michal Simek Cc: Ralf Baechle Cc: Paul Burton Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Ley Foon Tan Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: "James E.J. Bottomley" Cc: "David S. Miller" Cc: Jeff Dike Cc: Richard Weinberger Cc: Anton Ivanov Cc: Guan Xuetao Cc: Chris Zankel Cc: Max Filippov Cc: Thomas Bogendoerfer Link: http://lkml.kernel.org/r/1583802551-15406-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgtable.h | 2 -- arch/arm/include/asm/pgtable-2level.h | 2 -- arch/arm/include/asm/pgtable.h | 15 ++------ arch/arm/mm/mmu.c | 14 ++++++++ arch/csky/include/asm/pgtable.h | 3 -- arch/hexagon/include/asm/pgtable.h | 2 -- arch/ia64/include/asm/pgtable.h | 2 -- arch/m68k/include/asm/mcf_pgtable.h | 10 ------ arch/m68k/include/asm/motorola_pgtable.h | 2 -- arch/m68k/include/asm/sun3_pgtable.h | 2 -- arch/microblaze/include/asm/pgtable.h | 4 --- arch/mips/include/asm/pgtable.h | 44 ++++++++++++++++-------- arch/nds32/include/asm/pgtable.h | 9 ----- arch/nios2/include/asm/pgtable.h | 3 -- arch/openrisc/include/asm/pgtable.h | 2 -- arch/parisc/include/asm/pgtable.h | 2 -- arch/sparc/include/asm/pgtable_32.h | 7 ---- arch/um/include/asm/pgtable.h | 10 ------ arch/unicore32/include/asm/pgtable.h | 3 -- arch/xtensa/include/asm/pgtable.h | 3 -- include/linux/mm.h | 12 +++++++ 21 files changed, 58 insertions(+), 95 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 299791ce14b6..0267aa8a4f86 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -268,7 +268,6 @@ extern inline void pud_clear(pud_t * pudp) { pud_val(*pudp) = 0; } extern inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_FOW); } extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -extern inline int pte_special(pte_t pte) { return 0; } extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; } extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~(__DIRTY_BITS); return pte; } @@ -276,7 +275,6 @@ extern inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~(__ACCESS_BITS); ret extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_FOW; return pte; } extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; } extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; } -extern inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 0d3ea35c97fe..9e084a464a97 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -211,8 +211,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pmd_addr_end(addr,end) (end) #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) -#define pte_special(pte) (0) -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * We don't have huge page support for short descriptors, for the moment diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 0483cf413315..befc8fcec98f 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -243,19 +243,8 @@ static inline void __sync_icache_dcache(pte_t pteval) extern void __sync_icache_dcache(pte_t pteval); #endif -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) -{ - unsigned long ext = 0; - - if (addr < TASK_SIZE && pte_valid_user(pteval)) { - if (!pte_special(pteval)) - __sync_icache_dcache(pteval); - ext |= PTE_EXT_NG; - } - - set_pte_ext(ptep, pteval, ext); -} +void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval); static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) { diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 69a337df619f..ec8d0008bfa1 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1646,3 +1646,17 @@ void __init early_mm_init(const struct machine_desc *mdesc) build_mem_type_table(); early_paging_init(mdesc); } + +void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + unsigned long ext = 0; + + if (addr < TASK_SIZE && pte_valid_user(pteval)) { + if (!pte_special(pteval)) + __sync_icache_dcache(pteval); + ext |= PTE_EXT_NG; + } + + set_pte_ext(ptep, pteval, ext); +} diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index 9b7764cb7645..9ab4a445ad99 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -110,9 +110,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; extern void load_pgd(unsigned long pg_dir); extern pte_t invalid_pte_table[PTRS_PER_PTE]; -static inline int pte_special(pte_t pte) { return 0; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - static inline void set_pte(pte_t *p, pte_t pte) { *p = pte; diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 2fec20ad939e..d383e8bea5b2 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -158,8 +158,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* located in head.S */ /* Seems to be zero even in architectures where the zero page is firewalled? */ #define FIRST_USER_ADDRESS 0UL -#define pte_special(pte) 0 -#define pte_mkspecial(pte) (pte) /* HUGETLB not working currently */ #ifdef CONFIG_HUGETLB_PAGE diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index d602e7c622db..0e7b645b76c6 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -298,7 +298,6 @@ extern unsigned long VMALLOC_END; #define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0) #define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) #define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) -#define pte_special(pte) 0 /* * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the @@ -311,7 +310,6 @@ extern unsigned long VMALLOC_END; #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) #define pte_mkhuge(pte) (__pte(pte_val(pte))) -#define pte_mkspecial(pte) (pte) /* * Because ia64's Icache and Dcache is not coherent (on a cpu), we need to diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index b9f45aeded25..0031cd387b75 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -235,11 +235,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & CF_PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) -{ - return 0; -} - static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~CF_PAGE_WRITABLE; @@ -312,11 +307,6 @@ static inline pte_t pte_mkcache(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return pte; -} - #define swapper_pg_dir kernel_pg_dir extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 4b91a470ad58..48f19f0ab1e7 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -174,7 +174,6 @@ static inline void pud_set(pud_t *pudp, pmd_t *pmdp) static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RONLY); } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } @@ -192,7 +191,6 @@ static inline pte_t pte_mkcache(pte_t pte) pte_val(pte) = (pte_val(pte) & _CACHEMASK040) | m68k_supervisor_cachemode; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h index bc4155264810..0caa18a08437 100644 --- a/arch/m68k/include/asm/sun3_pgtable.h +++ b/arch/m68k/include/asm/sun3_pgtable.h @@ -155,7 +155,6 @@ static inline void pmd_clear (pmd_t *pmdp) { pmd_val (*pmdp) = 0; } static inline int pte_write(pte_t pte) { return pte_val(pte) & SUN3_PAGE_WRITEABLE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & SUN3_PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_MODIFIED; return pte; } @@ -168,7 +167,6 @@ static inline pte_t pte_mknocache(pte_t pte) { pte_val(pte) |= SUN3_PAGE_NOCACHE //static inline pte_t pte_mkcache(pte_t pte) { pte_val(pte) &= SUN3_PAGE_NOCACHE; return pte; } // until then, use: static inline pte_t pte_mkcache(pte_t pte) { return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 45b30878fc17..6b056f6545d8 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -77,10 +77,6 @@ extern pte_t *va_to_pte(unsigned long address); * Undefined behaviour if not.. */ -static inline int pte_special(pte_t pte) { return 0; } - -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - /* Start and end of the vmalloc area. */ /* Make sure to map the vmalloc area above the pinned kernel memory area of 32Mb. */ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index aef5378f909c..f1801e7a4b15 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -269,6 +269,36 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, */ extern pgd_t swapper_pg_dir[]; +/* + * Platform specific pte_special() and pte_mkspecial() definitions + * are required only when ARCH_HAS_PTE_SPECIAL is enabled. + */ +#if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) +#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) +static inline int pte_special(pte_t pte) +{ + return pte.pte_low & _PAGE_SPECIAL; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte.pte_low |= _PAGE_SPECIAL; + return pte; +} +#else +static inline int pte_special(pte_t pte) +{ + return pte_val(pte) & _PAGE_SPECIAL; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte_val(pte) |= _PAGE_SPECIAL; + return pte; +} +#endif +#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ + /* * The following only work if pte_present() is true. * Undefined behaviour if not.. @@ -277,7 +307,6 @@ extern pgd_t swapper_pg_dir[]; static inline int pte_write(pte_t pte) { return pte.pte_low & _PAGE_WRITE; } static inline int pte_dirty(pte_t pte) { return pte.pte_low & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte.pte_low & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return pte.pte_low & _PAGE_SPECIAL; } static inline pte_t pte_wrprotect(pte_t pte) { @@ -338,17 +367,10 @@ static inline pte_t pte_mkyoung(pte_t pte) } return pte; } - -static inline pte_t pte_mkspecial(pte_t pte) -{ - pte.pte_low |= _PAGE_SPECIAL; - return pte; -} #else static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline pte_t pte_wrprotect(pte_t pte) { @@ -392,12 +414,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) -{ - pte_val(pte) |= _PAGE_SPECIAL; - return pte; -} - #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; } diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index 6abc58ac406d..476cc4dd1709 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -286,15 +286,6 @@ PTE_BIT_FUNC(mkclean, &=~_PAGE_D); PTE_BIT_FUNC(mkdirty, |=_PAGE_D); PTE_BIT_FUNC(mkold, &=~_PAGE_YOUNG); PTE_BIT_FUNC(mkyoung, |=_PAGE_YOUNG); -static inline int pte_special(pte_t pte) -{ - return 0; -} - -static inline pte_t pte_mkspecial(pte_t pte) -{ - return pte; -} /* * Mark the prot value as uncacheable and unbufferable. diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 99985d8b7166..f98b7f4519ba 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -113,7 +113,6 @@ static inline int pte_dirty(pte_t pte) \ { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) \ { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } #define pgprot_noncached pgprot_noncached @@ -168,8 +167,6 @@ static inline pte_t pte_mkdirty(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 248d22d8faa7..7f3fb9ceb083 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -236,8 +236,6 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } static inline pte_t pte_wrprotect(pte_t pte) { diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index f0a365950536..9832c73a7021 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -377,7 +377,6 @@ static inline void pud_clear(pud_t *pud) { static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } @@ -385,7 +384,6 @@ static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_WRITE; ret static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Huge pte definitions. diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 6d6f44c0cad9..0de659ae0ba4 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -223,11 +223,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & SRMMU_REF; } -static inline int pte_special(pte_t pte) -{ - return 0; -} - static inline pte_t pte_wrprotect(pte_t pte) { return __pte(pte_val(pte) & ~SRMMU_WRITE); @@ -258,8 +253,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return __pte(pte_val(pte) | SRMMU_REF); } -#define pte_mkspecial(pte) (pte) - #define pfn_pte(pfn, prot) mk_pte(pfn_to_page(pfn), prot) static inline unsigned long pte_pfn(pte_t pte) diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 2daa58df2190..b5ddf5d98bd5 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -167,11 +167,6 @@ static inline int pte_newprot(pte_t pte) return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); } -static inline int pte_special(pte_t pte) -{ - return 0; -} - /* * ================================= * Flags setting section. @@ -247,11 +242,6 @@ static inline pte_t pte_mknewpage(pte_t pte) return(pte); } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return(pte); -} - static inline void set_pte(pte_t *pteptr, pte_t pteval) { pte_copy(*pteptr, pteval); diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index c8f7ba12f309..3b8731b3a937 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -177,7 +177,6 @@ extern struct page *empty_zero_page; #define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & PTE_YOUNG) #define pte_exec(pte) (pte_val(pte) & PTE_EXEC) -#define pte_special(pte) (0) #define PTE_BIT_FUNC(fn, op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } @@ -189,8 +188,6 @@ PTE_BIT_FUNC(mkdirty, |= PTE_DIRTY); PTE_BIT_FUNC(mkold, &= ~PTE_YOUNG); PTE_BIT_FUNC(mkyoung, |= PTE_YOUNG); -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - /* * Mark the prot value as uncacheable. */ diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 27ac17c9da09..8be0c0568c50 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -266,7 +266,6 @@ static inline void paging_init(void) { } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITABLE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~(_PAGE_WRITABLE | _PAGE_HW_WRITE); return pte; } @@ -280,8 +279,6 @@ static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITABLE; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) - { return pte; } #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CA_MASK)) diff --git a/include/linux/mm.h b/include/linux/mm.h index 4db1522d7c48..5a323422d783 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1927,6 +1927,18 @@ static inline void sync_mm_rss(struct mm_struct *mm) } #endif +#ifndef CONFIG_ARCH_HAS_PTE_SPECIAL +static inline int pte_special(pte_t pte) +{ + return 0; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} +#endif + #ifndef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t pte) { From 96c6b598135e7cec66161e8943823470c7c8954e Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:17 -0700 Subject: [PATCH 264/280] mm/memory_hotplug: drop the flags field from struct mhp_restrictions Patch series "Allow setting caching mode in arch_add_memory() for P2PDMA", v4. Currently, the page tables created using memremap_pages() are always created with the PAGE_KERNEL cacheing mode. However, the P2PDMA code is creating pages for PCI BAR memory which should never be accessed through the cache and instead use either WC or UC. This still works in most cases, on x86, because the MTRR registers typically override the caching settings in the page tables for all of the IO memory to be UC-. However, this tends not to work so well on other arches or some rare x86 machines that have firmware which does not setup the MTRR registers in this way. Instead of this, this series proposes a change to arch_add_memory() to take the pgprot required by the mapping which allows us to explicitly set pagetable entries for P2PDMA memory to UC. This changes is pretty routine for most of the arches: x86_64, arm64 and powerpc simply need to thread the pgprot through to where the page tables are setup. x86_32 unfortunately sets up the page tables at boot so must use _set_memory_prot() to change their caching mode. ia64, s390 and sh don't appear to have an easy way to change the page tables so, for now at least, we just return -EINVAL on such mappings and thus they will not support P2PDMA memory until the work for this is done. This should be fine as they don't yet support ZONE_DEVICE. This patch (of 7): This variable is not used anywhere and should therefore be removed from the structure. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Catalin Marinas Cc: Will Deacon Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Link: http://lkml.kernel.org/r/20200306170846.9333-2-logang@deltatee.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ef55115320fb..7c1bcff11672 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -59,11 +59,9 @@ enum { /* * Restrictions for the memory hotplug: - * flags: MHP_ flags * altmap: alternative allocator for memmap array */ struct mhp_restrictions { - unsigned long flags; struct vmem_altmap *altmap; }; From f5637d3b42ab0465ef71d5fb8461bce97fba95e8 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:21 -0700 Subject: [PATCH 265/280] mm/memory_hotplug: rename mhp_restrictions to mhp_params The mhp_restrictions struct really doesn't specify anything resembling a restriction anymore so rename it to be mhp_params as it is a list of extended parameters. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Dave Hansen Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-3-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 4 ++-- arch/ia64/mm/init.c | 4 ++-- arch/powerpc/mm/mem.c | 4 ++-- arch/s390/mm/init.c | 6 +++--- arch/sh/mm/init.c | 4 ++-- arch/x86/mm/init_32.c | 4 ++-- arch/x86/mm/init_64.c | 8 ++++---- include/linux/memory_hotplug.h | 16 ++++++++-------- mm/memory_hotplug.c | 8 ++++---- mm/memremap.c | 8 ++++---- 10 files changed, 33 insertions(+), 33 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9b08f7c7e6f0..6d4e9c2b4ed0 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1374,7 +1374,7 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) } int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { int ret, flags = 0; @@ -1387,7 +1387,7 @@ int arch_add_memory(int nid, u64 start, u64 size, memblock_clear_nomap(start, size); ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, - restrictions); + params); if (ret) __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b01d68a2d5d9..97bbc23ea1e3 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -670,13 +670,13 @@ mem_init (void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + ret = __add_pages(nid, start_pfn, nr_pages, params); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9b4f5fb719e0..e1cc58115816 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -122,7 +122,7 @@ static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, } int __ref arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -138,7 +138,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, return -EFAULT; } - return __add_pages(nid, start_pfn, nr_pages, restrictions); + return __add_pages(nid, start_pfn, nr_pages, params); } void __ref arch_remove_memory(int nid, u64 start, u64 size, diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ac44bd76db4b..e9e4a7abd0cc 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -268,20 +268,20 @@ device_initcall(s390_cma_mem_init); #endif /* CONFIG_CMA */ int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); int rc; - if (WARN_ON_ONCE(restrictions->altmap)) + if (WARN_ON_ONCE(params->altmap)) return -EINVAL; rc = vmem_add_mapping(start, size); if (rc) return rc; - rc = __add_pages(nid, start_pfn, size_pages, restrictions); + rc = __add_pages(nid, start_pfn, size_pages, params); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index d1b1ff2be17a..e5114c053364 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -406,14 +406,14 @@ void __init mem_init(void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; int ret; /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + ret = __add_pages(nid, start_pfn, nr_pages, params); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index de73992b8432..d736c8625503 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -819,12 +819,12 @@ void __init mem_init(void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, restrictions); + return __add_pages(nid, start_pfn, nr_pages, params); } void arch_remove_memory(int nid, u64 start, u64 size, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0a14711d3a93..faa86a9a3b0d 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -843,11 +843,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) } int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + ret = __add_pages(nid, start_pfn, nr_pages, params); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -858,14 +858,14 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, } int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, restrictions); + return add_pages(nid, start_pfn, nr_pages, params); } #define PAGE_INUSE 0xFD diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 7c1bcff11672..75f0f6304735 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -58,10 +58,10 @@ enum { }; /* - * Restrictions for the memory hotplug: - * altmap: alternative allocator for memmap array + * Extended parameters for memory hotplug: + * altmap: alternative allocator for memmap array (optional) */ -struct mhp_restrictions { +struct mhp_params { struct vmem_altmap *altmap; }; @@ -112,7 +112,7 @@ extern int restore_online_page_callback(online_page_callback_t callback); extern int try_online_node(int nid); extern int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions); + struct mhp_params *params); extern u64 max_mem_size; extern int memhp_online_type_from_str(const char *str); @@ -133,17 +133,17 @@ extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages, /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions); + struct mhp_params *params); #ifndef CONFIG_ARCH_HAS_ADD_PAGES static inline int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, struct mhp_restrictions *restrictions) + unsigned long nr_pages, struct mhp_params *params) { - return __add_pages(nid, start_pfn, nr_pages, restrictions); + return __add_pages(nid, start_pfn, nr_pages, params); } #else /* ARCH_HAS_ADD_PAGES */ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions); + struct mhp_params *params); #endif /* ARCH_HAS_ADD_PAGES */ #ifdef CONFIG_NUMA diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 635e8e286598..fbfe7b40f552 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -304,12 +304,12 @@ static int check_hotplug_memory_addressable(unsigned long pfn, * add the new pages. */ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { const unsigned long end_pfn = pfn + nr_pages; unsigned long cur_nr_pages; int err; - struct vmem_altmap *altmap = restrictions->altmap; + struct vmem_altmap *altmap = params->altmap; err = check_hotplug_memory_addressable(pfn, nr_pages); if (err) @@ -1002,7 +1002,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res) { - struct mhp_restrictions restrictions = {}; + struct mhp_params params = {}; u64 start, size; bool new_node = false; int ret; @@ -1030,7 +1030,7 @@ int __ref add_memory_resource(int nid, struct resource *res) new_node = ret; /* call arch's memory hotadd */ - ret = arch_add_memory(nid, start, size, &restrictions); + ret = arch_add_memory(nid, start, size, ¶ms); if (ret < 0) goto error; diff --git a/mm/memremap.c b/mm/memremap.c index bbf457c4f166..b0b5170843ff 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -184,7 +184,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) { struct resource *res = &pgmap->res; struct dev_pagemap *conflict_pgmap; - struct mhp_restrictions restrictions = { + struct mhp_params params = { /* * We do not want any optional features only our own memmap */ @@ -302,7 +302,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) */ if (pgmap->type == MEMORY_DEVICE_PRIVATE) { error = add_pages(nid, PHYS_PFN(res->start), - PHYS_PFN(resource_size(res)), &restrictions); + PHYS_PFN(resource_size(res)), ¶ms); } else { error = kasan_add_zero_shadow(__va(res->start), resource_size(res)); if (error) { @@ -311,7 +311,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } error = arch_add_memory(nid, res->start, resource_size(res), - &restrictions); + ¶ms); } if (!error) { @@ -319,7 +319,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; move_pfn_range_to_zone(zone, PHYS_PFN(res->start), - PHYS_PFN(resource_size(res)), restrictions.altmap); + PHYS_PFN(resource_size(res)), params.altmap); } mem_hotplug_done(); From c164fbb40c43f8041f4d05ec9996d8ee343c92b1 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:24 -0700 Subject: [PATCH 266/280] x86/mm: thread pgprot_t through init_memory_mapping() In preparation to support a pgprot_t argument for arch_add_memory(). It's required to move the prototype of init_memory_mapping() seeing the original location came before the definition of pgprot_t. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christoph Hellwig Cc: David Hildenbrand Cc: Eric Badger Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-4-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/page_types.h | 3 --- arch/x86/include/asm/pgtable.h | 3 +++ arch/x86/kernel/amd_gart_64.c | 3 ++- arch/x86/mm/init.c | 9 +++++---- arch/x86/mm/init_32.c | 3 ++- arch/x86/mm/init_64.c | 32 +++++++++++++++++-------------- arch/x86/mm/mm_internal.h | 3 ++- arch/x86/platform/uv/bios_uv.c | 3 ++- 8 files changed, 34 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index e27aa6be6320..a506a411474d 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -71,9 +71,6 @@ static inline phys_addr_t get_max_mapped(void) bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); - extern void initmem_init(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index abad0da0973a..4d02e64af1b3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1081,6 +1081,9 @@ static inline void __meminit init_trampoline_default(void) void __init poking_init(void); +unsigned long init_memory_mapping(unsigned long start, + unsigned long end, pgprot_t prot); + # ifdef CONFIG_RANDOMIZE_MEMORY void __meminit init_trampoline(void); # else diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 4e5f50236048..16133819415c 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -744,7 +744,8 @@ int __init gart_iommu_init(void) start_pfn = PFN_DOWN(aper_base); if (!pfn_range_is_mapped(start_pfn, end_pfn)) - init_memory_mapping(start_pfn<> PAGE_SHIFT, ret >> PAGE_SHIFT); @@ -521,7 +522,7 @@ static unsigned long __init init_range_memory_mapping( */ can_use_brk_pgt = max(start, (u64)pgt_buf_end<= min(end, (u64)pgt_buf_top<> PAGE_SHIFT, - PAGE_KERNEL_LARGE), + prot), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; @@ -669,7 +672,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, static unsigned long __meminit phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, - unsigned long page_size_mask, bool init) + unsigned long page_size_mask, pgprot_t prot, bool init) { unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; @@ -679,7 +682,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!pgtable_l5_enabled()) return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, - page_size_mask, init); + page_size_mask, prot, init); for (; vaddr < vaddr_end; vaddr = vaddr_next) { p4d_t *p4d = p4d_page + p4d_index(vaddr); @@ -702,13 +705,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!p4d_none(*p4d)) { pud = pud_offset(p4d, 0); paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); continue; } pud = alloc_low_page(); paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); spin_lock(&init_mm.page_table_lock); p4d_populate_init(&init_mm, p4d, pud, init); @@ -722,7 +725,7 @@ static unsigned long __meminit __kernel_physical_mapping_init(unsigned long paddr_start, unsigned long paddr_end, unsigned long page_size_mask, - bool init) + pgprot_t prot, bool init) { bool pgd_changed = false; unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; @@ -743,13 +746,13 @@ __kernel_physical_mapping_init(unsigned long paddr_start, paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), page_size_mask, - init); + prot, init); continue; } p4d = alloc_low_page(); paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); spin_lock(&init_mm.page_table_lock); if (pgtable_l5_enabled()) @@ -778,10 +781,10 @@ __kernel_physical_mapping_init(unsigned long paddr_start, unsigned long __meminit kernel_physical_mapping_init(unsigned long paddr_start, unsigned long paddr_end, - unsigned long page_size_mask) + unsigned long page_size_mask, pgprot_t prot) { return __kernel_physical_mapping_init(paddr_start, paddr_end, - page_size_mask, true); + page_size_mask, prot, true); } /* @@ -796,7 +799,8 @@ kernel_physical_mapping_change(unsigned long paddr_start, unsigned long page_size_mask) { return __kernel_physical_mapping_init(paddr_start, paddr_end, - page_size_mask, false); + page_size_mask, PAGE_KERNEL, + false); } #ifndef CONFIG_NUMA @@ -863,7 +867,7 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - init_memory_mapping(start, start + size); + init_memory_mapping(start, start + size, PAGE_KERNEL); return add_pages(nid, start_pfn, nr_pages, params); } diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h index eeae142062ed..3f37b5c80bb3 100644 --- a/arch/x86/mm/mm_internal.h +++ b/arch/x86/mm/mm_internal.h @@ -12,7 +12,8 @@ void early_ioremap_page_table_range_init(void); unsigned long kernel_physical_mapping_init(unsigned long start, unsigned long end, - unsigned long page_size_mask); + unsigned long page_size_mask, + pgprot_t prot); unsigned long kernel_physical_mapping_change(unsigned long start, unsigned long end, unsigned long page_size_mask); diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 607f58147311..c60255da5a6c 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -352,7 +352,8 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, if (type == EFI_MEMORY_MAPPED_IO) return ioremap(phys_addr, size); - last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size, + PAGE_KERNEL); if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { unsigned long top = last_map_pfn << PAGE_SHIFT; efi_ioremap(top, size - (top - phys_addr), type, attribute); From 30796e18c29942c4d64bf89c4135c975393ec1ad Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:28 -0700 Subject: [PATCH 267/280] x86/mm: introduce __set_memory_prot() For use in the 32bit arch_add_memory() to set the pgprot type of the memory to add. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christoph Hellwig Cc: David Hildenbrand Cc: Eric Badger Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Michal Hocko Cc: Paul Mackerras Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-5-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/set_memory.h | 1 + arch/x86/mm/pat/set_memory.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 950532ccbc4a..ec2c0a094b5d 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -34,6 +34,7 @@ * The caller is required to take care of these. */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot); int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wt(unsigned long addr, int numpages); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 6d5424069e2b..59eca6a94ce7 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1795,6 +1795,19 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages, CPA_PAGES_ARRAY, pages); } +/* + * _set_memory_prot is an internal helper for callers that have been passed + * a pgprot_t value from upper layers and a reservation has already been taken. + * If you want to set the pgprot to a specific page protocol, use the + * set_memory_xx() functions. + */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot) +{ + return change_page_attr_set_clr(&addr, numpages, prot, + __pgprot(~pgprot_val(prot)), 0, 0, + NULL); +} + int _set_memory_uc(unsigned long addr, int numpages) { /* From 4e00c5affdd4b04e6392001716333971932f3d0c Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:32 -0700 Subject: [PATCH 268/280] powerpc/mm: thread pgprot_t through create_section_mapping() In prepartion to support a pgprot_t argument for arch_add_memory(). Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Hansen Cc: David Hildenbrand Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Michal Hocko Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-6-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/hash.h | 3 ++- arch/powerpc/include/asm/book3s/64/radix.h | 3 ++- arch/powerpc/include/asm/sparsemem.h | 3 ++- arch/powerpc/mm/book3s64/hash_utils.c | 5 +++-- arch/powerpc/mm/book3s64/pgtable.c | 7 ++++--- arch/powerpc/mm/book3s64/radix_pgtable.c | 18 +++++++++++------- arch/powerpc/mm/mem.c | 5 +++-- 7 files changed, 27 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 2781ebf6add4..6fc4520092c7 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -251,7 +251,8 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start, extern void hash__vmemmap_remove_mapping(unsigned long start, unsigned long page_size); -int hash__create_section_mapping(unsigned long start, unsigned long end, int nid); +int hash__create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); int hash__remove_section_mapping(unsigned long start, unsigned long end); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index a1c60d5b50af..08c222d5b764 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -294,7 +294,8 @@ static inline unsigned long radix__get_tree_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int radix__create_section_mapping(unsigned long start, unsigned long end, int nid); +int radix__create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); int radix__remove_section_mapping(unsigned long start, unsigned long end); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 3192d454a733..c89b32443cff 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -13,7 +13,8 @@ #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end, int nid); +extern int create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); extern int remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7e5714a69a58..8ed2411c3f39 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -809,7 +809,8 @@ int resize_hpt_for_hotplug(unsigned long new_mem_size) return 0; } -int hash__create_section_mapping(unsigned long start, unsigned long end, int nid) +int hash__create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot) { int rc; @@ -819,7 +820,7 @@ int hash__create_section_mapping(unsigned long start, unsigned long end, int nid } rc = htab_bolt_mapping(start, end, __pa(start), - pgprot_val(PAGE_KERNEL), mmu_linear_psize, + pgprot_val(prot), mmu_linear_psize, mmu_kernel_ssize); if (rc < 0) { diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 2bf7e1b4fd82..e0bb69c616e4 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -171,12 +171,13 @@ void mmu_cleanup_all(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid) +int __meminit create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot) { if (radix_enabled()) - return radix__create_section_mapping(start, end, nid); + return radix__create_section_mapping(start, end, nid, prot); - return hash__create_section_mapping(start, end, nid); + return hash__create_section_mapping(start, end, nid, prot); } int __meminit remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 2a9a0cd79490..8f9edf07063a 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -254,7 +254,7 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end) static int __meminit create_physical_mapping(unsigned long start, unsigned long end, - int nid) + int nid, pgprot_t _prot) { unsigned long vaddr, addr, mapping_size = 0; bool prev_exec, exec = false; @@ -290,7 +290,7 @@ static int __meminit create_physical_mapping(unsigned long start, prot = PAGE_KERNEL_X; exec = true; } else { - prot = PAGE_KERNEL; + prot = _prot; exec = false; } @@ -334,7 +334,7 @@ static void __init radix_init_pgtable(void) WARN_ON(create_physical_mapping(reg->base, reg->base + reg->size, - -1)); + -1, PAGE_KERNEL)); } /* Find out how many PID bits are supported */ @@ -713,8 +713,10 @@ static int __meminit stop_machine_change_mapping(void *data) spin_unlock(&init_mm.page_table_lock); pte_clear(&init_mm, params->aligned_start, params->pte); - create_physical_mapping(__pa(params->aligned_start), __pa(params->start), -1); - create_physical_mapping(__pa(params->end), __pa(params->aligned_end), -1); + create_physical_mapping(__pa(params->aligned_start), + __pa(params->start), -1, PAGE_KERNEL); + create_physical_mapping(__pa(params->end), __pa(params->aligned_end), + -1, PAGE_KERNEL); spin_lock(&init_mm.page_table_lock); return 0; } @@ -871,14 +873,16 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) radix__flush_tlb_kernel_range(start, end); } -int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid) +int __meminit radix__create_section_mapping(unsigned long start, + unsigned long end, int nid, + pgprot_t prot) { if (end >= RADIX_VMALLOC_START) { pr_warn("Outside the supported range\n"); return -1; } - return create_physical_mapping(__pa(start), __pa(end), nid); + return create_physical_mapping(__pa(start), __pa(end), nid, prot); } int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e1cc58115816..bf63ab04db63 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -90,7 +90,8 @@ int memory_add_physaddr_to_nid(u64 start) } #endif -int __weak create_section_mapping(unsigned long start, unsigned long end, int nid) +int __weak create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot) { return -ENODEV; } @@ -131,7 +132,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, resize_hpt_for_hotplug(memblock_phys_mem_size()); start = (unsigned long)__va(start); - rc = create_section_mapping(start, start + size, nid); + rc = create_section_mapping(start, start + size, nid, PAGE_KERNEL); if (rc) { pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", start, start + size, rc); From bfeb022f8fe4c5afdcfd7a3d868fac9765f9bcad Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:36 -0700 Subject: [PATCH 269/280] mm/memory_hotplug: add pgprot_t to mhp_params devm_memremap_pages() is currently used by the PCI P2PDMA code to create struct page mappings for IO memory. At present, these mappings are created with PAGE_KERNEL which implies setting the PAT bits to be WB. However, on x86, an mtrr register will typically override this and force the cache type to be UC-. In the case firmware doesn't set this register it is effectively WB and will typically result in a machine check exception when it's accessed. Other arches are not currently likely to function correctly seeing they don't have any MTRR registers to fall back on. To solve this, provide a way to specify the pgprot value explicitly to arch_add_memory(). Of the arches that support MEMORY_HOTPLUG: x86_64, and arm64 need a simple change to pass the pgprot_t down to their respective functions which set up the page tables. For x86_32, set the page tables explicitly using _set_memory_prot() (seeing they are already mapped). For ia64, s390 and sh, reject anything but PAGE_KERNEL settings -- this should be fine, for now, seeing these architectures don't support ZONE_DEVICE. A check in __add_pages() is also added to ensure the pgprot parameter was set for all arches. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Acked-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Dan Williams Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Dave Hansen Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-7-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 3 ++- arch/ia64/mm/init.c | 3 +++ arch/powerpc/mm/mem.c | 3 ++- arch/s390/mm/init.c | 3 +++ arch/sh/mm/init.c | 3 +++ arch/x86/mm/init_32.c | 12 ++++++++++++ arch/x86/mm/init_64.c | 2 +- include/linux/memory_hotplug.h | 3 +++ mm/memory_hotplug.c | 5 ++++- mm/memremap.c | 6 +++--- 10 files changed, 36 insertions(+), 7 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6d4e9c2b4ed0..a374e4f51a62 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1382,7 +1382,8 @@ int arch_add_memory(int nid, u64 start, u64 size, flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), - size, PAGE_KERNEL, __pgd_pgtable_alloc, flags); + size, params->pgprot, __pgd_pgtable_alloc, + flags); memblock_clear_nomap(start, size); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 97bbc23ea1e3..d637b4ea3147 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -676,6 +676,9 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long nr_pages = size >> PAGE_SHIFT; int ret; + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) + return -EINVAL; + ret = __add_pages(nid, start_pfn, nr_pages, params); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index bf63ab04db63..041ed7cfd341 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -132,7 +132,8 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, resize_hpt_for_hotplug(memblock_phys_mem_size()); start = (unsigned long)__va(start); - rc = create_section_mapping(start, start + size, nid, PAGE_KERNEL); + rc = create_section_mapping(start, start + size, nid, + params->pgprot); if (rc) { pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", start, start + size, rc); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index e9e4a7abd0cc..87b2d024e75a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -277,6 +277,9 @@ int arch_add_memory(int nid, u64 start, u64 size, if (WARN_ON_ONCE(params->altmap)) return -EINVAL; + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) + return -EINVAL; + rc = vmem_add_mapping(start, size); if (rc) return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index e5114c053364..b9de2d4fa57e 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -412,6 +412,9 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long nr_pages = size >> PAGE_SHIFT; int ret; + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot) + return -EINVAL; + /* We only have ZONE_NORMAL, so this is easy.. */ ret = __add_pages(nid, start_pfn, nr_pages, params); if (unlikely(ret)) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ac75a8397804..4222a010057a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -824,6 +824,18 @@ int arch_add_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + int ret; + + /* + * The page tables were already mapped at boot so if the caller + * requests a different mapping type then we must change all the + * pages with __set_memory_prot(). + */ + if (params->pgprot.pgprot != PAGE_KERNEL.pgprot) { + ret = __set_memory_prot(start, nr_pages, params->pgprot); + if (ret) + return ret; + } return __add_pages(nid, start_pfn, nr_pages, params); } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7480de743105..3b289c2f75cd 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -867,7 +867,7 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - init_memory_mapping(start, start + size, PAGE_KERNEL); + init_memory_mapping(start, start + size, params->pgprot); return add_pages(nid, start_pfn, nr_pages, params); } diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 75f0f6304735..93d9ada74ddd 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -60,9 +60,12 @@ enum { /* * Extended parameters for memory hotplug: * altmap: alternative allocator for memmap array (optional) + * pgprot: page protection flags to apply to newly created page tables + * (required) */ struct mhp_params { struct vmem_altmap *altmap; + pgprot_t pgprot; }; /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fbfe7b40f552..fc0aad0bc1f5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -311,6 +311,9 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, int err; struct vmem_altmap *altmap = params->altmap; + if (WARN_ON_ONCE(!params->pgprot.pgprot)) + return -EINVAL; + err = check_hotplug_memory_addressable(pfn, nr_pages); if (err) return err; @@ -1002,7 +1005,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res) { - struct mhp_params params = {}; + struct mhp_params params = { .pgprot = PAGE_KERNEL }; u64 start, size; bool new_node = false; int ret; diff --git a/mm/memremap.c b/mm/memremap.c index b0b5170843ff..bc167cde3237 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -189,8 +189,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) * We do not want any optional features only our own memmap */ .altmap = pgmap_altmap(pgmap), + .pgprot = PAGE_KERNEL, }; - pgprot_t pgprot = PAGE_KERNEL; int error, is_ram; bool need_devmap_managed = true; @@ -282,8 +282,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) if (nid < 0) nid = numa_mem_id(); - error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0, - resource_size(res)); + error = track_pfn_remap(NULL, ¶ms.pgprot, PHYS_PFN(res->start), + 0, resource_size(res)); if (error) goto err_pfn_remap; From a50d8d98a87f33efa07adfa20747e13a93839a4b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:39 -0700 Subject: [PATCH 270/280] mm/memremap: set caching mode for PCI P2PDMA memory to WC PCI BAR IO memory should never be mapped as WB, however prior to this the PAT bits were set WB and it was typically overridden by MTRR registers set by the firmware. Set PCI P2PDMA memory to be UC as this is what it currently, typically, ends up being mapped as on x86 after the MTRR registers override the cache setting. Future use-cases may need to generalize this by adding flags to select the caching type, as some P2PDMA cases may not want UC. However, those use-cases are not upstream yet and this can be changed when they arrive. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David Hildenbrand Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Michael Ellerman Cc: Michal Hocko Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-8-logang@deltatee.com Signed-off-by: Linus Torvalds --- mm/memremap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memremap.c b/mm/memremap.c index bc167cde3237..03e38b7a38f1 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -217,7 +217,10 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } break; case MEMORY_DEVICE_DEVDAX: + need_devmap_managed = false; + break; case MEMORY_DEVICE_PCI_P2PDMA: + params.pgprot = pgprot_noncached(params.pgprot); need_devmap_managed = false; break; default: From d7d27cfc5cf0766a26a8f56868c5ad5434735126 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:43 -0700 Subject: [PATCH 271/280] kmod: make request_module() return an error when autoloading is disabled Patch series "module autoloading fixes and cleanups", v5. This series fixes a bug where request_module() was reporting success to kernel code when module autoloading had been completely disabled via 'echo > /proc/sys/kernel/modprobe'. It also addresses the issues raised on the original thread (https://lkml.kernel.org/lkml/20200310223731.126894-1-ebiggers@kernel.org/T/#u) bydocumenting the modprobe sysctl, adding a self-test for the empty path case, and downgrading a user-reachable WARN_ONCE(). This patch (of 4): It's long been possible to disable kernel module autoloading completely (while still allowing manual module insertion) by setting /proc/sys/kernel/modprobe to the empty string. This can be preferable to setting it to a nonexistent file since it avoids the overhead of an attempted execve(), avoids potential deadlocks, and avoids the call to security_kernel_module_request() and thus on SELinux-based systems eliminates the need to write SELinux rules to dontaudit module_request. However, when module autoloading is disabled in this way, request_module() returns 0. This is broken because callers expect 0 to mean that the module was successfully loaded. Apparently this was never noticed because this method of disabling module autoloading isn't used much, and also most callers don't use the return value of request_module() since it's always necessary to check whether the module registered its functionality or not anyway. But improperly returning 0 can indeed confuse a few callers, for example get_fs_type() in fs/filesystems.c where it causes a WARNING to be hit: if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name); } This is easily reproduced with: echo > /proc/sys/kernel/modprobe mount -t NONEXISTENT none / It causes: request_module fs-NONEXISTENT succeeded, but still no fs? WARNING: CPU: 1 PID: 1106 at fs/filesystems.c:275 get_fs_type+0xd6/0xf0 [...] This should actually use pr_warn_once() rather than WARN_ONCE(), since it's also user-reachable if userspace immediately unloads the module. Regardless, request_module() should correctly return an error when it fails. So let's make it return -ENOENT, which matches the error when the modprobe binary doesn't exist. I've also sent patches to document and test this case. Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Reviewed-by: Jessica Yu Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Ben Hutchings Cc: Josh Triplett Cc: Link: http://lkml.kernel.org/r/20200310223731.126894-1-ebiggers@kernel.org Link: http://lkml.kernel.org/r/20200312202552.241885-1-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- kernel/kmod.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kmod.c b/kernel/kmod.c index 8b2b311afa95..37c3c4b97b8e 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -120,7 +120,7 @@ static int call_modprobe(char *module_name, int wait) * invoke it. * * If module auto-loading support is disabled then this function - * becomes a no-operation. + * simply returns -ENOENT. */ int __request_module(bool wait, const char *fmt, ...) { @@ -137,7 +137,7 @@ int __request_module(bool wait, const char *fmt, ...) WARN_ON_ONCE(wait && current_is_async()); if (!modprobe_path[0]) - return 0; + return -ENOENT; va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); From 26c5d78c976ca298e59a56f6101a97b618ba3539 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:47 -0700 Subject: [PATCH 272/280] fs/filesystems.c: downgrade user-reachable WARN_ONCE() to pr_warn_once() After request_module(), nothing is stopping the module from being unloaded until someone takes a reference to it via try_get_module(). The WARN_ONCE() in get_fs_type() is thus user-reachable, via userspace running 'rmmod' concurrently. Since WARN_ONCE() is for kernel bugs only, not for user-reachable situations, downgrade this warning to pr_warn_once(). Keep it printed once only, since the intent of this warning is to detect a bug in modprobe at boot time. Printing the warning more than once wouldn't really provide any useful extra information. Fixes: 41124db869b7 ("fs: warn in case userspace lied about modprobe return") Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Reviewed-by: Jessica Yu Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: Luis Chamberlain Cc: NeilBrown Cc: [4.13+] Link: http://lkml.kernel.org/r/20200312202552.241885-3-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- fs/filesystems.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/filesystems.c b/fs/filesystems.c index 77bf5f95362d..90b8d879fbaf 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -272,7 +272,9 @@ struct file_system_type *get_fs_type(const char *name) fs = __get_fs_type(name, len); if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); - WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name); + if (!fs) + pr_warn_once("request_module fs-%.*s succeeded, but still no fs?\n", + len, name); } if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { From 6e71582506258c9b2efd8f164706f2af2256cf16 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:50 -0700 Subject: [PATCH 273/280] docs: admin-guide: document the kernel.modprobe sysctl Document the kernel.modprobe sysctl in the same place that all the other kernel.* sysctls are documented. Make sure to mention how to use this sysctl to completely disable module autoloading, and how this sysctl relates to CONFIG_STATIC_USERMODEHELPER. [ebiggers@google.com: v5] Link: http://lkml.kernel.org/r/20200318230515.171692-4-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: Luis Chamberlain Cc: NeilBrown Link: http://lkml.kernel.org/r/20200312202552.241885-4-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- Documentation/admin-guide/sysctl/kernel.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 335696d3360d..39c95c0e13d3 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -446,6 +446,27 @@ Notes: successful IPC object allocation. If an IPC object allocation syscall fails, it is undefined if the value remains unmodified or is reset to -1. +modprobe: +========= + +The path to the usermode helper for autoloading kernel modules, by +default "/sbin/modprobe". This binary is executed when the kernel +requests a module. For example, if userspace passes an unknown +filesystem type to mount(), then the kernel will automatically request +the corresponding filesystem module by executing this usermode helper. +This usermode helper should insert the needed module into the kernel. + +This sysctl only affects module autoloading. It has no effect on the +ability to explicitly insert modules. + +If this sysctl is set to the empty string, then module autoloading is +completely disabled. The kernel will not try to execute a usermode +helper at all, nor will it call the kernel_module_request LSM hook. + +If CONFIG_STATIC_USERMODEHELPER=y is set in the kernel configuration, +then the configured static usermode helper overrides this sysctl, +except that the empty string is still accepted to completely disable +module autoloading as described above. nmi_watchdog ============ From 6d573a07528308eb77ec072c010819c359bebf6e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:53 -0700 Subject: [PATCH 274/280] selftests: kmod: fix handling test numbers above 9 get_test_count() and get_test_enabled() were broken for test numbers above 9 due to awk interpreting a field specification like '$0010' as octal rather than decimal. Fix it by stripping the leading zeroes. Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: NeilBrown Link: http://lkml.kernel.org/r/20200318230515.171692-5-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- tools/testing/selftests/kmod/kmod.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 8b944cf042f6..315a43111e04 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -505,18 +505,23 @@ function test_num() fi } -function get_test_count() +function get_test_data() { test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + local field_num=$(echo $1 | sed 's/^0*//') + echo $ALL_TESTS | awk '{print $'$field_num'}' +} + +function get_test_count() +{ + TEST_DATA=$(get_test_data $1) LAST_TWO=${TEST_DATA#*:*} echo ${LAST_TWO%:*} } function get_test_enabled() { - test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + TEST_DATA=$(get_test_data $1) echo ${TEST_DATA#*:*:} } From 23756e551f35aaa9400a7e8a2660494115221801 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:57 -0700 Subject: [PATCH 275/280] selftests: kmod: test disabling module autoloading Test that request_module() fails with -ENOENT when /proc/sys/kernel/modprobe contains (a) a nonexistent path, and (b) an empty path. Case (b) is a regression test for the patch "kmod: make request_module() return an error when autoloading is disabled". Tested with 'kmod.sh -t 0010 && kmod.sh -t 0011', and also simply with 'kmod.sh' to run all kmod tests. Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: NeilBrown Link: http://lkml.kernel.org/r/20200312202552.241885-5-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- tools/testing/selftests/kmod/kmod.sh | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 315a43111e04..3702dbcc90a7 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -61,6 +61,8 @@ ALL_TESTS="$ALL_TESTS 0006:10:1" ALL_TESTS="$ALL_TESTS 0007:5:1" ALL_TESTS="$ALL_TESTS 0008:150:1" ALL_TESTS="$ALL_TESTS 0009:150:1" +ALL_TESTS="$ALL_TESTS 0010:1:1" +ALL_TESTS="$ALL_TESTS 0011:1:1" # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -149,6 +151,7 @@ function load_req_mod() test_finish() { + echo "$MODPROBE" > /proc/sys/kernel/modprobe echo "Test completed" } @@ -443,6 +446,30 @@ kmod_test_0009() config_expect_result ${FUNCNAME[0]} SUCCESS } +kmod_test_0010() +{ + kmod_defaults_driver + config_num_threads 1 + echo "/KMOD_TEST_NONEXISTENT" > /proc/sys/kernel/modprobe + config_trigger ${FUNCNAME[0]} + config_expect_result ${FUNCNAME[0]} -ENOENT + echo "$MODPROBE" > /proc/sys/kernel/modprobe +} + +kmod_test_0011() +{ + kmod_defaults_driver + config_num_threads 1 + # This causes the kernel to not even try executing modprobe. The error + # code is still -ENOENT like when modprobe doesn't exist, so we can't + # easily test for the exact difference. But this still is a useful test + # since there was a bug where request_module() returned 0 in this case. + echo > /proc/sys/kernel/modprobe + config_trigger ${FUNCNAME[0]} + config_expect_result ${FUNCNAME[0]} -ENOENT + echo "$MODPROBE" > /proc/sys/kernel/modprobe +} + list_tests() { echo "Test ID list:" @@ -460,6 +487,8 @@ list_tests() echo "0007 x $(get_test_count 0007) - multithreaded tests with default setup test request_module() and get_fs_type()" echo "0008 x $(get_test_count 0008) - multithreaded - push kmod_concurrent over max_modprobes for request_module()" echo "0009 x $(get_test_count 0009) - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()" + echo "0010 x $(get_test_count 0010) - test nonexistent modprobe path" + echo "0011 x $(get_test_count 0011) - test completely disabling module autoloading" } usage() @@ -616,6 +645,7 @@ test_reqs allow_user_defaults load_req_mod +MODPROBE=$( Date: Fri, 10 Apr 2020 14:34:00 -0700 Subject: [PATCH 276/280] =?UTF-8?q?change=20email=20address=20for=20Pali?= =?UTF-8?q?=20Roh=C3=A1r?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For security reasons I stopped using gmail account and kernel address is now up-to-date alias to my personal address. People periodically send me emails to address which they found in source code of drivers, so this change reflects state where people can contact me. [ Added .mailmap entry as per Joe Perches - Linus ] Signed-off-by: Pali Rohár Signed-off-by: Andrew Morton Cc: Greg Kroah-Hartman Cc: Joe Perches Link: http://lkml.kernel.org/r/20200307104237.8199-1-pali@kernel.org Signed-off-by: Linus Torvalds --- .mailmap | 1 + .../ABI/testing/sysfs-platform-dell-laptop | 8 ++++---- MAINTAINERS | 16 ++++++++-------- arch/arm/mach-omap2/omap-secure.c | 2 +- arch/arm/mach-omap2/omap-secure.h | 2 +- arch/arm/mach-omap2/omap-smc.S | 2 +- drivers/char/hw_random/omap3-rom-rng.c | 4 ++-- drivers/hwmon/dell-smm-hwmon.c | 4 ++-- drivers/platform/x86/dell-laptop.c | 4 ++-- drivers/platform/x86/dell-rbtn.c | 4 ++-- drivers/platform/x86/dell-rbtn.h | 2 +- drivers/platform/x86/dell-smbios-base.c | 4 ++-- drivers/platform/x86/dell-smbios-smm.c | 2 +- drivers/platform/x86/dell-smbios.h | 2 +- drivers/platform/x86/dell-smo8800.c | 2 +- drivers/platform/x86/dell-wmi.c | 4 ++-- drivers/power/supply/bq2415x_charger.c | 4 ++-- drivers/power/supply/bq27xxx_battery.c | 2 +- drivers/power/supply/isp1704_charger.c | 2 +- drivers/power/supply/rx51_battery.c | 4 ++-- fs/udf/ecma_167.h | 2 +- fs/udf/osta_udf.h | 2 +- include/linux/power/bq2415x_charger.h | 2 +- tools/laptop/freefall/freefall.c | 2 +- 24 files changed, 42 insertions(+), 41 deletions(-) diff --git a/.mailmap b/.mailmap index 9198a93c2f5c..893266d1f7b0 100644 --- a/.mailmap +++ b/.mailmap @@ -210,6 +210,7 @@ Oleksij Rempel Oleksij Rempel Oleksij Rempel Oleksij Rempel +Pali Rohár Paolo 'Blaisorblade' Giarrusso Patrick Mochel Paul Burton diff --git a/Documentation/ABI/testing/sysfs-platform-dell-laptop b/Documentation/ABI/testing/sysfs-platform-dell-laptop index 8c6a0b8e1131..9b917c7453de 100644 --- a/Documentation/ABI/testing/sysfs-platform-dell-laptop +++ b/Documentation/ABI/testing/sysfs-platform-dell-laptop @@ -2,7 +2,7 @@ What: /sys/class/leds/dell::kbd_backlight/als_enabled Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta , - Pali Rohár + Pali Rohár Description: This file allows to control the automatic keyboard illumination mode on some systems that have an ambient @@ -13,7 +13,7 @@ What: /sys/class/leds/dell::kbd_backlight/als_setting Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta , - Pali Rohár + Pali Rohár Description: This file allows to specifiy the on/off threshold value, as reported by the ambient light sensor. @@ -22,7 +22,7 @@ What: /sys/class/leds/dell::kbd_backlight/start_triggers Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta , - Pali Rohár + Pali Rohár Description: This file allows to control the input triggers that turn on the keyboard backlight illumination that is @@ -45,7 +45,7 @@ What: /sys/class/leds/dell::kbd_backlight/stop_timeout Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta , - Pali Rohár + Pali Rohár Description: This file allows to specify the interval after which the keyboard illumination is disabled because of inactivity. diff --git a/MAINTAINERS b/MAINTAINERS index d5b1878f2815..ff043097ea0e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -727,7 +727,7 @@ L: linux-alpha@vger.kernel.org F: arch/alpha/ ALPS PS/2 TOUCHPAD DRIVER -R: Pali Rohár +R: Pali Rohár F: drivers/input/mouse/alps.* ALTERA I2C CONTROLLER DRIVER @@ -4774,23 +4774,23 @@ F: drivers/net/fddi/defza.* DELL LAPTOP DRIVER M: Matthew Garrett -M: Pali Rohár +M: Pali Rohár L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/dell-laptop.c DELL LAPTOP FREEFALL DRIVER -M: Pali Rohár +M: Pali Rohár S: Maintained F: drivers/platform/x86/dell-smo8800.c DELL LAPTOP RBTN DRIVER -M: Pali Rohár +M: Pali Rohár S: Maintained F: drivers/platform/x86/dell-rbtn.* DELL LAPTOP SMM DRIVER -M: Pali Rohár +M: Pali Rohár S: Maintained F: drivers/hwmon/dell-smm-hwmon.c F: include/uapi/linux/i8k.h @@ -4802,7 +4802,7 @@ S: Maintained F: drivers/platform/x86/dell_rbu.c DELL SMBIOS DRIVER -M: Pali Rohár +M: Pali Rohár M: Mario Limonciello L: platform-driver-x86@vger.kernel.org S: Maintained @@ -4835,7 +4835,7 @@ F: drivers/platform/x86/dell-wmi-descriptor.c DELL WMI NOTIFICATIONS DRIVER M: Matthew Garrett -M: Pali Rohár +M: Pali Rohár S: Maintained F: drivers/platform/x86/dell-wmi.c @@ -11950,7 +11950,7 @@ F: drivers/media/i2c/et8ek8 F: drivers/media/i2c/ad5820.c NOKIA N900 POWER SUPPLY DRIVERS -R: Pali Rohár +R: Pali Rohár F: include/linux/power/bq2415x_charger.h F: include/linux/power/bq27xxx_battery.h F: drivers/power/supply/bq2415x_charger.c diff --git a/arch/arm/mach-omap2/omap-secure.c b/arch/arm/mach-omap2/omap-secure.c index d00e3c72e37d..f70d561f37f7 100644 --- a/arch/arm/mach-omap2/omap-secure.c +++ b/arch/arm/mach-omap2/omap-secure.c @@ -5,7 +5,7 @@ * Copyright (C) 2011 Texas Instruments, Inc. * Santosh Shilimkar * Copyright (C) 2012 Ivaylo Dimitrov - * Copyright (C) 2013 Pali Rohár + * Copyright (C) 2013 Pali Rohár */ #include diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index ba8c486c0454..4aaa95706d39 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -5,7 +5,7 @@ * Copyright (C) 2011 Texas Instruments, Inc. * Santosh Shilimkar * Copyright (C) 2012 Ivaylo Dimitrov - * Copyright (C) 2013 Pali Rohár + * Copyright (C) 2013 Pali Rohár */ #ifndef OMAP_ARCH_OMAP_SECURE_H #define OMAP_ARCH_OMAP_SECURE_H diff --git a/arch/arm/mach-omap2/omap-smc.S b/arch/arm/mach-omap2/omap-smc.S index d4832845a4e8..7376f528034d 100644 --- a/arch/arm/mach-omap2/omap-smc.S +++ b/arch/arm/mach-omap2/omap-smc.S @@ -6,7 +6,7 @@ * Written by Santosh Shilimkar * * Copyright (C) 2012 Ivaylo Dimitrov - * Copyright (C) 2013 Pali Rohár + * Copyright (C) 2013 Pali Rohár */ #include diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index a431c5cbe2be..e0d77fa048fb 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -4,7 +4,7 @@ * Copyright (C) 2009 Nokia Corporation * Author: Juha Yrjola * - * Copyright (C) 2013 Pali Rohár + * Copyright (C) 2013 Pali Rohár * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -178,5 +178,5 @@ module_platform_driver(omap3_rom_rng_driver); MODULE_ALIAS("platform:omap3-rom-rng"); MODULE_AUTHOR("Juha Yrjola"); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index d4c83009d625..ab719d372b0d 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -7,7 +7,7 @@ * Hwmon integration: * Copyright (C) 2011 Jean Delvare * Copyright (C) 2013, 2014 Guenter Roeck - * Copyright (C) 2014, 2015 Pali Rohár + * Copyright (C) 2014, 2015 Pali Rohár */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -86,7 +86,7 @@ static unsigned int auto_fan; #define I8K_HWMON_HAVE_FAN3 (1 << 12) MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)"); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("Dell laptop SMM BIOS hwmon driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("i8k"); diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 74e988f839e8..f8d3e3bd1bb5 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat * Copyright (c) 2014 Gabriele Mazzotta - * Copyright (c) 2014 Pali Rohár + * Copyright (c) 2014 Pali Rohár * * Based on documentation in the libsmbios package: * Copyright (C) 2005-2014 Dell Inc. @@ -2295,6 +2295,6 @@ module_exit(dell_exit); MODULE_AUTHOR("Matthew Garrett "); MODULE_AUTHOR("Gabriele Mazzotta "); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("Dell laptop driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/dell-rbtn.c b/drivers/platform/x86/dell-rbtn.c index a6b856cd86bd..a89fad47ff13 100644 --- a/drivers/platform/x86/dell-rbtn.c +++ b/drivers/platform/x86/dell-rbtn.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* Dell Airplane Mode Switch driver - Copyright (C) 2014-2015 Pali Rohár + Copyright (C) 2014-2015 Pali Rohár */ @@ -495,5 +495,5 @@ MODULE_PARM_DESC(auto_remove_rfkill, "Automatically remove rfkill devices when " "(default true)"); MODULE_DEVICE_TABLE(acpi, rbtn_ids); MODULE_DESCRIPTION("Dell Airplane Mode Switch driver"); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/dell-rbtn.h b/drivers/platform/x86/dell-rbtn.h index 0fdc81644458..5e030f926c58 100644 --- a/drivers/platform/x86/dell-rbtn.h +++ b/drivers/platform/x86/dell-rbtn.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Dell Airplane Mode Switch driver - Copyright (C) 2014-2015 Pali Rohár + Copyright (C) 2014-2015 Pali Rohár */ diff --git a/drivers/platform/x86/dell-smbios-base.c b/drivers/platform/x86/dell-smbios-base.c index fe59b0ebff31..2e2cd565926a 100644 --- a/drivers/platform/x86/dell-smbios-base.c +++ b/drivers/platform/x86/dell-smbios-base.c @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat * Copyright (c) 2014 Gabriele Mazzotta - * Copyright (c) 2014 Pali Rohár + * Copyright (c) 2014 Pali Rohár * * Based on documentation in the libsmbios package: * Copyright (C) 2005-2014 Dell Inc. @@ -645,7 +645,7 @@ module_exit(dell_smbios_exit); MODULE_AUTHOR("Matthew Garrett "); MODULE_AUTHOR("Gabriele Mazzotta "); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_AUTHOR("Mario Limonciello "); MODULE_DESCRIPTION("Common functions for kernel modules using Dell SMBIOS"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/dell-smbios-smm.c b/drivers/platform/x86/dell-smbios-smm.c index d6854d1c4119..97c52a839a3e 100644 --- a/drivers/platform/x86/dell-smbios-smm.c +++ b/drivers/platform/x86/dell-smbios-smm.c @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat * Copyright (c) 2014 Gabriele Mazzotta - * Copyright (c) 2014 Pali Rohár + * Copyright (c) 2014 Pali Rohár * Copyright (c) 2017 Dell Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/platform/x86/dell-smbios.h b/drivers/platform/x86/dell-smbios.h index a7ff9803f41a..75fa8ea0476d 100644 --- a/drivers/platform/x86/dell-smbios.h +++ b/drivers/platform/x86/dell-smbios.h @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat * Copyright (c) 2014 Gabriele Mazzotta - * Copyright (c) 2014 Pali Rohár + * Copyright (c) 2014 Pali Rohár * * Based on documentation in the libsmbios package: * Copyright (C) 2005-2014 Dell Inc. diff --git a/drivers/platform/x86/dell-smo8800.c b/drivers/platform/x86/dell-smo8800.c index b531fe8ab7e0..5d9304a7de1b 100644 --- a/drivers/platform/x86/dell-smo8800.c +++ b/drivers/platform/x86/dell-smo8800.c @@ -3,7 +3,7 @@ * dell-smo8800.c - Dell Latitude ACPI SMO88XX freefall sensor driver * * Copyright (C) 2012 Sonal Santan - * Copyright (C) 2014 Pali Rohár + * Copyright (C) 2014 Pali Rohár * * This is loosely based on lis3lv02d driver. */ diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index 6669db2555fb..86e8dd6a8b33 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -3,7 +3,7 @@ * Dell WMI hotkeys * * Copyright (C) 2008 Red Hat - * Copyright (C) 2014-2015 Pali Rohár + * Copyright (C) 2014-2015 Pali Rohár * * Portions based on wistron_btns.c: * Copyright (C) 2005 Miloslav Trmac @@ -29,7 +29,7 @@ #include "dell-wmi-descriptor.h" MODULE_AUTHOR("Matthew Garrett "); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("Dell laptop WMI hotkeys driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/power/supply/bq2415x_charger.c b/drivers/power/supply/bq2415x_charger.c index 532f6e4fcafb..a1f00ae1c180 100644 --- a/drivers/power/supply/bq2415x_charger.c +++ b/drivers/power/supply/bq2415x_charger.c @@ -2,7 +2,7 @@ /* * bq2415x charger driver * - * Copyright (C) 2011-2013 Pali Rohár + * Copyright (C) 2011-2013 Pali Rohár * * Datasheets: * http://www.ti.com/product/bq24150 @@ -1788,6 +1788,6 @@ static struct i2c_driver bq2415x_driver = { }; module_i2c_driver(bq2415x_driver); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("bq2415x charger driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index 664e50103eaa..942c92127b6d 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -4,7 +4,7 @@ * Copyright (C) 2008 Rodolfo Giometti * Copyright (C) 2008 Eurotech S.p.A. * Copyright (C) 2010-2011 Lars-Peter Clausen - * Copyright (C) 2011 Pali Rohár + * Copyright (C) 2011 Pali Rohár * Copyright (C) 2017 Liam Breck * * Based on a previous work by Copyright (C) 2008 Texas Instruments, Inc. diff --git a/drivers/power/supply/isp1704_charger.c b/drivers/power/supply/isp1704_charger.c index 4812ac1ff2df..b6efc454e4f0 100644 --- a/drivers/power/supply/isp1704_charger.c +++ b/drivers/power/supply/isp1704_charger.c @@ -3,7 +3,7 @@ * ISP1704 USB Charger Detection driver * * Copyright (C) 2010 Nokia Corporation - * Copyright (C) 2012 - 2013 Pali Rohár + * Copyright (C) 2012 - 2013 Pali Rohár */ #include diff --git a/drivers/power/supply/rx51_battery.c b/drivers/power/supply/rx51_battery.c index 8548b639ff2f..6e488ecf4dcb 100644 --- a/drivers/power/supply/rx51_battery.c +++ b/drivers/power/supply/rx51_battery.c @@ -2,7 +2,7 @@ /* * Nokia RX-51 battery driver * - * Copyright (C) 2012 Pali Rohár + * Copyright (C) 2012 Pali Rohár */ #include @@ -278,6 +278,6 @@ static struct platform_driver rx51_battery_driver = { module_platform_driver(rx51_battery_driver); MODULE_ALIAS("platform:rx51-battery"); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("Nokia RX-51 battery driver"); MODULE_LICENSE("GPL"); diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h index 3fd85464abd5..736ebc5dc441 100644 --- a/fs/udf/ecma_167.h +++ b/fs/udf/ecma_167.h @@ -5,7 +5,7 @@ * http://www.ecma.ch * * Copyright (c) 2001-2002 Ben Fennema - * Copyright (c) 2017-2019 Pali Rohár + * Copyright (c) 2017-2019 Pali Rohár * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/fs/udf/osta_udf.h b/fs/udf/osta_udf.h index 35e61b2cacfe..d5fbfab3ddb6 100644 --- a/fs/udf/osta_udf.h +++ b/fs/udf/osta_udf.h @@ -5,7 +5,7 @@ * http://www.osta.org * * Copyright (c) 2001-2004 Ben Fennema - * Copyright (c) 2017-2019 Pali Rohár + * Copyright (c) 2017-2019 Pali Rohár * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h index 7a91b357e3ac..4ca08321e251 100644 --- a/include/linux/power/bq2415x_charger.h +++ b/include/linux/power/bq2415x_charger.h @@ -2,7 +2,7 @@ /* * bq2415x charger driver * - * Copyright (C) 2011-2013 Pali Rohár + * Copyright (C) 2011-2013 Pali Rohár */ #ifndef BQ2415X_CHARGER_H diff --git a/tools/laptop/freefall/freefall.c b/tools/laptop/freefall/freefall.c index d29a86cda87f..d77d7861787c 100644 --- a/tools/laptop/freefall/freefall.c +++ b/tools/laptop/freefall/freefall.c @@ -4,7 +4,7 @@ * Copyright 2008 Eric Piel * Copyright 2009 Pavel Machek * Copyright 2012 Sonal Santan - * Copyright 2014 Pali Rohár + * Copyright 2014 Pali Rohár */ #include From cb8d9937e85559ea0ad1f4f83df8ad1288fed47d Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Fri, 10 Apr 2020 14:34:03 -0700 Subject: [PATCH 277/280] drivers/dma/tegra20-apb-dma.c: fix platform_get_irq.cocci warnings Remove dev_err() messages after platform_get_irq*() failures. platform_get_irq() already prints an error. Generated by: scripts/coccinelle/api/platform_get_irq.cocci Fixes: 6c41ac96ad92 ("dmaengine: tegra-apb: Support COMPILE_TEST") Signed-off-by: kbuild test robot Signed-off-by: Julia Lawall Signed-off-by: Andrew Morton Reviewed-by: Dmitry Osipenko Acked-by: Thierry Reding Cc: Laxman Dewangan Cc: Vinod Koul Cc: Stephen Warren Cc: Jon Hunter Link: http://lkml.kernel.org/r/alpine.DEB.2.21.2002271133450.2973@hadrien Signed-off-by: Linus Torvalds --- drivers/dma/tegra20-apb-dma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 265303a396ca..f6a2f42ffc51 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -1493,7 +1493,6 @@ static int tegra_dma_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, i); if (irq < 0) { ret = irq; - dev_err(&pdev->dev, "No irq resource for chan %d\n", i); goto err_pm_disable; } From 3bfa7e141b0bbb818b25e0daafb65aee92e49ac4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Apr 2020 14:34:06 -0700 Subject: [PATCH 278/280] fs/seq_file.c: seq_read(): add info message about buggy .next functions Patch series "seq_file .next functions should increase position index". In Aug 2018 NeilBrown noticed commit 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface") "Some ->next functions do not increment *pos when they return NULL... Note that such ->next functions are buggy and should be fixed. A simple demonstration is dd if=/proc/swaps bs=1000 skip=1 Choose any block size larger than the size of /proc/swaps. This will always show the whole last line of /proc/swaps" Described problem is still actual. If you make lseek into middle of last output line following read will output end of last line and whole last line once again. $ dd if=/proc/swaps bs=1 # usual output Filename Type Size Used Priority /dev/dm-0 partition 4194812 97536 -2 104+0 records in 104+0 records out 104 bytes copied $ dd if=/proc/swaps bs=40 skip=1 # last line was generated twice dd: /proc/swaps: cannot skip to specified offset v/dm-0 partition 4194812 97536 -2 /dev/dm-0 partition 4194812 97536 -2 3+1 records in 3+1 records out 131 bytes copied There are lot of other affected files, I've found 30+ including /proc/net/ip_tables_matches and /proc/sysvipc/* I've sent patches into maillists of affected subsystems already, this patch-set fixes the problem in files related to pstore, tracing, gcov, sysvipc and other subsystems processed via linux-kernel@ mailing list directly https://bugzilla.kernel.org/show_bug.cgi?id=206283 This patch (of 4): Add debug code to seq_read() to detect missed or out-of-tree incorrect .next seq_file functions. [akpm@linux-foundation.org: s/pr_info/pr_info_ratelimited/, per Qian Cai] https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Cc: NeilBrown Cc: Al Viro Cc: Steven Rostedt Cc: Davidlohr Bueso Cc: Ingo Molnar Cc: Manfred Spraul Cc: Peter Oberparleiter Cc: Waiman Long Link: http://lkml.kernel.org/r/244674e5-760c-86bd-d08a-047042881748@virtuozzo.com Link: http://lkml.kernel.org/r/7c24087c-e280-e580-5b0c-0cdaeb14cd18@virtuozzo.com Signed-off-by: Linus Torvalds --- fs/seq_file.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index 79781ebd2145..70f5fdf99bf6 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -232,9 +232,12 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) loff_t pos = m->index; p = m->op->next(m, p, &m->index); - if (pos == m->index) - /* Buggy ->next function */ + if (pos == m->index) { + pr_info_ratelimited("buggy seq_file .next function %ps " + "did not updated position index\n", + m->op->next); m->index++; + } if (!p || IS_ERR(p)) { err = PTR_ERR(p); break; From f4d74ef6220c1eda0875da30457bef5c7111ab06 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Apr 2020 14:34:10 -0700 Subject: [PATCH 279/280] kernel/gcov/fs.c: gcov_seq_next() should increase position index If seq_file .next function does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Acked-by: Peter Oberparleiter Cc: Al Viro Cc: Davidlohr Bueso Cc: Ingo Molnar Cc: Manfred Spraul Cc: NeilBrown Cc: Steven Rostedt Cc: Waiman Long Link: http://lkml.kernel.org/r/f65c6ee7-bd00-f910-2f8a-37cc67e4ff88@virtuozzo.com Signed-off-by: Linus Torvalds --- kernel/gcov/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 5e891c3c2d93..82babf5aa077 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -108,9 +108,9 @@ static void *gcov_seq_next(struct seq_file *seq, void *data, loff_t *pos) { struct gcov_iterator *iter = data; + (*pos)++; if (gcov_iter_next(iter)) return NULL; - (*pos)++; return iter; } From 89163f93c6f969da5811af5377cc10173583123b Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Apr 2020 14:34:13 -0700 Subject: [PATCH 280/280] ipc/util.c: sysvipc_find_ipc() should increase position index If seq_file .next function does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Acked-by: Waiman Long Cc: Davidlohr Bueso Cc: Manfred Spraul Cc: Al Viro Cc: Ingo Molnar Cc: NeilBrown Cc: Peter Oberparleiter Cc: Steven Rostedt Link: http://lkml.kernel.org/r/b7a20945-e315-8bb0-21e6-3875c14a8494@virtuozzo.com Signed-off-by: Linus Torvalds --- ipc/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipc/util.c b/ipc/util.c index 97638eb2d7cb..7acccfded7cb 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -764,13 +764,13 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, total++; } + *new_pos = pos + 1; if (total >= ids->in_use) return NULL; for (; pos < ipc_mni; pos++) { ipc = idr_find(&ids->ipcs_idr, pos); if (ipc != NULL) { - *new_pos = pos + 1; rcu_read_lock(); ipc_lock_object(ipc); return ipc;