From 90eef9f7120835ad4b330dfb66b7fa2ce958b208 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Mon, 14 Dec 2020 21:41:18 +0800 Subject: [PATCH 001/414] RDMA: Convert comma to semicolon Replace a comma between expression statements by a semicolon. Link: https://lore.kernel.org/r/20201214134118.4349-1-zhengyongjun3@huawei.com Link: https://lore.kernel.org/r/20201214134146.4456-1-zhengyongjun3@huawei.com Link: https://lore.kernel.org/r/20201214134218.4510-1-zhengyongjun3@huawei.com Link: https://lore.kernel.org/r/20201214134243.4563-1-zhengyongjun3@huawei.com Signed-off-by: Zheng Yongjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/qsfp.c | 4 ++-- drivers/infiniband/sw/siw/siw_main.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 +- drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 8386c84c2d92..38f311f855b5 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -242,7 +242,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c, msgs[0].buf = offset_bytes; msgs[1].addr = slave_addr; - msgs[1].flags = I2C_M_NOSTART, + msgs[1].flags = I2C_M_NOSTART; msgs[1].len = len; msgs[1].buf = data; break; @@ -290,7 +290,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus, msgs[0].buf = offset_bytes; msgs[1].addr = slave_addr; - msgs[1].flags = I2C_M_RD, + msgs[1].flags = I2C_M_RD; msgs[1].len = len; msgs[1].buf = data; break; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index ee95cf29179d..81a294269592 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -357,7 +357,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sizeof(base_dev->iw_ifname)); /* Disable TCP port mapping */ - base_dev->iw_driver_flags = IW_F_NO_PORT_MAP, + base_dev->iw_driver_flags = IW_F_NO_PORT_MAP; sdev->attrs.max_qp = SIW_MAX_QP; sdev->attrs.max_qp_wr = SIW_MAX_QP_WR; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 86e4ed64e4e2..e3e4447c0f51 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -275,7 +275,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, memset(&av, 0, sizeof(av)); av.type = rdma_ah_find_type(priv->ca, priv->port); - rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)), + rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)); rdma_ah_set_port_num(&av, priv->port); rdma_ah_set_sl(&av, mcast->mcmember.sl); rdma_ah_set_static_rate(&av, mcast->mcmember.rate); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 4933085a864a..cecf0f7cadf9 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -233,7 +233,7 @@ static void vema_get_class_port_info(struct opa_vnic_vema_port *port, port_info = (struct opa_class_port_info *)rsp_mad->data; memcpy(port_info, &port->class_port_info, sizeof(*port_info)); - port_info->base_version = OPA_MGMT_BASE_VERSION, + port_info->base_version = OPA_MGMT_BASE_VERSION; port_info->class_version = OPA_EMA_CLASS_VERSION; /* From 6847f4392467970aeb992b568893f16aa64ddc18 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 23 Dec 2020 11:30:41 -0800 Subject: [PATCH 002/414] RDMA/hns: remove h from printk format specifier This change fixes the checkpatch warning described in commit cbacb5ab0aa0 ("docs: printk-formats: Stop encouraging use of unnecessary %h[xudi] and %hh[xudi]") Standard integer promotion is already done and %hx and %hhx is useless so do not encourage the use of %hh[xudi] or %h[xudi]. Link: https://lore.kernel.org/r/20201223193041.122850-1-trix@redhat.com Signed-off-by: Tom Rix Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 833e1f259936..4c068899c52b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -361,7 +361,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, } else if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || hr_qp->state == IB_QPS_RTR)) { - ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n", + ibdev_err(ibdev, "failed to post WQE, QP state %u!\n", hr_qp->state); return -EINVAL; } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { From fe91b2366a0bc2a426f8f88e110d5410f1feadd8 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 15 Dec 2020 10:35:09 -0800 Subject: [PATCH 003/414] RDMA/hfi1: remove h from printk format specifier See Documentation/core-api/printk-formats.rst. h should no longer be used in the format specifier for printk. Link: https://lore.kernel.org/r/20201215183509.2072517-1-trix@redhat.com Signed-off-by: Tom Rix Acked-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index a307d4c8b15a..27ec2851160a 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1740,7 +1740,7 @@ static inline u16 sdma_gethead(struct sdma_engine *sde) sane = (hwhead == swhead); if (unlikely(!sane)) { - dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n", + dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%u swhd=%u swtl=%u cnt=%u\n", sde->this_idx, use_dmahead ? "dma" : "kreg", hwhead, swhead, swtail, cnt); From 0ccccb045c870a34730319cb1fb9cad8c8d53f2b Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 16 Dec 2020 16:02:19 +0800 Subject: [PATCH 004/414] RDMA/cma: Delete useless kfree code The parameter of kfree function is NULL, so kfree code is useless, delete it. Therefore, goto expression is no longer needed, so simplify it. cma_dev_group is always pre-zero'd before reaching make_cma_ports, so the NULL set to cma_dev_group->ports is unneeded too. Link: https://lore.kernel.org/r/20201216080219.18184-1-zhengyongjun3@huawei.com Signed-off-by: Zheng Yongjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma_configfs.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7f70e5a7de10..9fa1653fb7b7 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -202,7 +202,6 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, unsigned int i; unsigned int ports_num; struct cma_dev_port_group *ports; - int err; ibdev = cma_get_ib_dev(cma_dev); @@ -213,10 +212,8 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports), GFP_KERNEL); - if (!ports) { - err = -ENOMEM; - goto free; - } + if (!ports) + return -ENOMEM; for (i = 0; i < ports_num; i++) { char port_str[10]; @@ -232,12 +229,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, } cma_dev_group->ports = ports; - return 0; -free: - kfree(ports); - cma_dev_group->ports = NULL; - return err; } static void release_cma_dev(struct config_item *item) From aaf1226bd95b25b910617019a71d0de13f221c9d Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Tue, 29 Dec 2020 21:52:23 +0800 Subject: [PATCH 005/414] RDMA: Use kzalloc for allocating only one thing Use kzalloc rather than kcalloc(1,...) The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ @@ - kcalloc(1, + kzalloc( ...) // Link: https://lore.kernel.org/r/20201229135223.23815-1-zhengyongjun3@huawei.com Link: https://lore.kernel.org/r/20201229135232.23869-1-zhengyongjun3@huawei.com Signed-off-by: Zheng Yongjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rw.c | 2 +- drivers/infiniband/hw/cxgb4/restrack.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index a96030b784eb..31156e22d3e7 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -410,7 +410,7 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, ctx->type = RDMA_RW_SIG_MR; ctx->nr_ops = 1; - ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL); + ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL); if (!ctx->reg) { ret = -ENOMEM; goto out_unmap_prot_sg; diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c index b32e6516d65f..ff645b955a08 100644 --- a/drivers/infiniband/hw/cxgb4/restrack.c +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -209,7 +209,7 @@ int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, epcp = (struct c4iw_ep_common *)iw_cm_id->provider_data; if (!epcp) return 0; - uep = kcalloc(1, sizeof(*uep), GFP_KERNEL); + uep = kzalloc(sizeof(*uep), GFP_KERNEL); if (!uep) return 0; From 661f385961f06f36da24cf408d461f988d0c39ad Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Fri, 8 Jan 2021 13:58:45 +0100 Subject: [PATCH 006/414] RDMA/siw: Fix handling of zero-sized Read and Receive Queues. During connection setup, the application may choose to zero-size inbound and outbound READ queues, as well as the Receive queue. This patch fixes handling of zero-sized queues, but not prevents it. Kamal Heib says in an initial error report: When running the blktests over siw the following shift-out-of-bounds is reported, this is happening because the passed IRD or ORD from the ulp could be zero which will lead to unexpected behavior when calling roundup_pow_of_two(), fix that by blocking zero values of ORD or IRD. UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13 shift exponent 64 is too large for 64-bit type 'long unsigned int' CPU: 20 PID: 3957 Comm: kworker/u64:13 Tainted: G S 5.10.0-rc6 #2 Hardware name: Dell Inc. PowerEdge R630/02C2CP, BIOS 2.1.5 04/11/2016 Workqueue: iw_cm_wq cm_work_handler [iw_cm] Call Trace: dump_stack+0x99/0xcb ubsan_epilogue+0x5/0x40 __ubsan_handle_shift_out_of_bounds.cold.11+0xb4/0xf3 ? down_write+0x183/0x3d0 siw_qp_modify.cold.8+0x2d/0x32 [siw] ? __local_bh_enable_ip+0xa5/0xf0 siw_accept+0x906/0x1b60 [siw] ? xa_load+0x147/0x1f0 ? siw_connect+0x17a0/0x17a0 [siw] ? lock_downgrade+0x700/0x700 ? siw_get_base_qp+0x1c2/0x340 [siw] ? _raw_spin_unlock_irqrestore+0x39/0x40 iw_cm_accept+0x1f4/0x430 [iw_cm] rdma_accept+0x3fa/0xb10 [rdma_cm] ? check_flush_dependency+0x410/0x410 ? cma_rep_recv+0x570/0x570 [rdma_cm] nvmet_rdma_queue_connect+0x1a62/0x2680 [nvmet_rdma] ? nvmet_rdma_alloc_cmds+0xce0/0xce0 [nvmet_rdma] ? lock_release+0x56e/0xcc0 ? lock_downgrade+0x700/0x700 ? lock_downgrade+0x700/0x700 ? __xa_alloc_cyclic+0xef/0x350 ? __xa_alloc+0x2d0/0x2d0 ? rdma_restrack_add+0xbe/0x2c0 [ib_core] ? __ww_mutex_die+0x190/0x190 cma_cm_event_handler+0xf2/0x500 [rdma_cm] iw_conn_req_handler+0x910/0xcb0 [rdma_cm] ? _raw_spin_unlock_irqrestore+0x39/0x40 ? trace_hardirqs_on+0x1c/0x150 ? cma_ib_handler+0x8a0/0x8a0 [rdma_cm] ? __kasan_kmalloc.constprop.7+0xc1/0xd0 cm_work_handler+0x121c/0x17a0 [iw_cm] ? iw_cm_reject+0x190/0x190 [iw_cm] ? trace_hardirqs_on+0x1c/0x150 process_one_work+0x8fb/0x16c0 ? pwq_dec_nr_in_flight+0x320/0x320 worker_thread+0x87/0xb40 ? __kthread_parkme+0xd1/0x1a0 ? process_one_work+0x16c0/0x16c0 kthread+0x35f/0x430 ? kthread_mod_delayed_work+0x180/0x180 ret_from_fork+0x22/0x30 Fixes: a531975279f3 ("rdma/siw: main include file") Fixes: f29dd55b0236 ("rdma/siw: queue pair methods") Fixes: 8b6a361b8c48 ("rdma/siw: receive path") Fixes: b9be6f18cf9e ("rdma/siw: transmit path") Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Link: https://lore.kernel.org/r/20210108125845.1803-1-bmt@zurich.ibm.com Reported-by: Kamal Heib Reported-by: Yi Zhang Reported-by: kernel test robot Signed-off-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw.h | 2 +- drivers/infiniband/sw/siw/siw_qp.c | 289 ++++++++++++++------------ drivers/infiniband/sw/siw/siw_qp_rx.c | 26 ++- drivers/infiniband/sw/siw/siw_qp_tx.c | 4 +- drivers/infiniband/sw/siw/siw_verbs.c | 20 +- 5 files changed, 186 insertions(+), 155 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index adda78996219..368959ae9a8c 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -653,7 +653,7 @@ static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) { struct siw_sqe *orq_e = orq_get_tail(qp); - if (orq_e && READ_ONCE(orq_e->flags) == 0) + if (READ_ONCE(orq_e->flags) == 0) return orq_e; return NULL; diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 875d36d4b1c6..ddb2e66f9f13 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -199,26 +199,26 @@ void siw_qp_llp_write_space(struct sock *sk) static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) { - irq_size = roundup_pow_of_two(irq_size); - orq_size = roundup_pow_of_two(orq_size); - + if (irq_size) { + irq_size = roundup_pow_of_two(irq_size); + qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); + if (!qp->irq) { + qp->attrs.irq_size = 0; + return -ENOMEM; + } + } + if (orq_size) { + orq_size = roundup_pow_of_two(orq_size); + qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); + if (!qp->orq) { + qp->attrs.orq_size = 0; + qp->attrs.irq_size = 0; + vfree(qp->irq); + return -ENOMEM; + } + } qp->attrs.irq_size = irq_size; qp->attrs.orq_size = orq_size; - - qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); - if (!qp->irq) { - siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size); - qp->attrs.irq_size = 0; - return -ENOMEM; - } - qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); - if (!qp->orq) { - siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size); - qp->attrs.orq_size = 0; - qp->attrs.irq_size = 0; - vfree(qp->irq); - return -ENOMEM; - } siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size); return 0; } @@ -288,13 +288,14 @@ int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl) if (ctrl & MPA_V2_RDMA_WRITE_RTR) wqe->sqe.opcode = SIW_OP_WRITE; else if (ctrl & MPA_V2_RDMA_READ_RTR) { - struct siw_sqe *rreq; + struct siw_sqe *rreq = NULL; wqe->sqe.opcode = SIW_OP_READ; spin_lock(&qp->orq_lock); - rreq = orq_get_free(qp); + if (qp->attrs.orq_size) + rreq = orq_get_free(qp); if (rreq) { siw_read_to_orq(rreq, &wqe->sqe); qp->orq_put++; @@ -877,6 +878,96 @@ void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe) rreq->num_sge = 1; } +static int siw_activate_tx_from_sq(struct siw_qp *qp) +{ + struct siw_sqe *sqe; + struct siw_wqe *wqe = tx_wqe(qp); + int rv = 1; + + sqe = sq_get_next(qp); + if (!sqe) + return 0; + + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; + + /* First copy SQE to kernel private memory */ + memcpy(&wqe->sqe, sqe, sizeof(*sqe)); + + if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { + rv = -EINVAL; + goto out; + } + if (wqe->sqe.flags & SIW_WQE_INLINE) { + if (wqe->sqe.opcode != SIW_OP_SEND && + wqe->sqe.opcode != SIW_OP_WRITE) { + rv = -EINVAL; + goto out; + } + if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { + rv = -EINVAL; + goto out; + } + wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].lkey = 0; + wqe->sqe.num_sge = 1; + } + if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { + /* A READ cannot be fenced */ + if (unlikely(wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == + SIW_OP_READ_LOCAL_INV)) { + siw_dbg_qp(qp, "cannot fence read\n"); + rv = -EINVAL; + goto out; + } + spin_lock(&qp->orq_lock); + + if (qp->attrs.orq_size && !siw_orq_empty(qp)) { + qp->tx_ctx.orq_fence = 1; + rv = 0; + } + spin_unlock(&qp->orq_lock); + + } else if (wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { + struct siw_sqe *rreq; + + if (unlikely(!qp->attrs.orq_size)) { + /* We negotiated not to send READ req's */ + rv = -EINVAL; + goto out; + } + wqe->sqe.num_sge = 1; + + spin_lock(&qp->orq_lock); + + rreq = orq_get_free(qp); + if (rreq) { + /* + * Make an immediate copy in ORQ to be ready + * to process loopback READ reply + */ + siw_read_to_orq(rreq, &wqe->sqe); + qp->orq_put++; + } else { + qp->tx_ctx.orq_fence = 1; + rv = 0; + } + spin_unlock(&qp->orq_lock); + } + + /* Clear SQE, can be re-used by application */ + smp_store_mb(sqe->flags, 0); + qp->sq_get++; +out: + if (unlikely(rv < 0)) { + siw_dbg_qp(qp, "error %d\n", rv); + wqe->wr_status = SIW_WR_IDLE; + } + return rv; +} + /* * Must be called with SQ locked. * To avoid complete SQ starvation by constant inbound READ requests, @@ -885,133 +976,55 @@ void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe) */ int siw_activate_tx(struct siw_qp *qp) { - struct siw_sqe *irqe, *sqe; + struct siw_sqe *irqe; struct siw_wqe *wqe = tx_wqe(qp); - int rv = 1; + + if (!qp->attrs.irq_size) + return siw_activate_tx_from_sq(qp); irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size]; - if (irqe->flags & SIW_WQE_VALID) { - sqe = sq_get_next(qp); + if (!(irqe->flags & SIW_WQE_VALID)) + return siw_activate_tx_from_sq(qp); - /* - * Avoid local WQE processing starvation in case - * of constant inbound READ request stream - */ - if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { - qp->irq_burst = 0; - goto skip_irq; - } - memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); - wqe->wr_status = SIW_WR_QUEUED; - - /* start READ RESPONSE */ - wqe->sqe.opcode = SIW_OP_READ_RESPONSE; - wqe->sqe.flags = 0; - if (irqe->num_sge) { - wqe->sqe.num_sge = 1; - wqe->sqe.sge[0].length = irqe->sge[0].length; - wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; - wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; - } else { - wqe->sqe.num_sge = 0; - } - - /* Retain original RREQ's message sequence number for - * potential error reporting cases. - */ - wqe->sqe.sge[1].length = irqe->sge[1].length; - - wqe->sqe.rkey = irqe->rkey; - wqe->sqe.raddr = irqe->raddr; - - wqe->processed = 0; - qp->irq_get++; - - /* mark current IRQ entry free */ - smp_store_mb(irqe->flags, 0); - - goto out; + /* + * Avoid local WQE processing starvation in case + * of constant inbound READ request stream + */ + if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { + qp->irq_burst = 0; + return siw_activate_tx_from_sq(qp); } - sqe = sq_get_next(qp); - if (sqe) { -skip_irq: - memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); - wqe->wr_status = SIW_WR_QUEUED; + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; - /* First copy SQE to kernel private memory */ - memcpy(&wqe->sqe, sqe, sizeof(*sqe)); - - if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { - rv = -EINVAL; - goto out; - } - if (wqe->sqe.flags & SIW_WQE_INLINE) { - if (wqe->sqe.opcode != SIW_OP_SEND && - wqe->sqe.opcode != SIW_OP_WRITE) { - rv = -EINVAL; - goto out; - } - if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { - rv = -EINVAL; - goto out; - } - wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; - wqe->sqe.sge[0].lkey = 0; - wqe->sqe.num_sge = 1; - } - if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { - /* A READ cannot be fenced */ - if (unlikely(wqe->sqe.opcode == SIW_OP_READ || - wqe->sqe.opcode == - SIW_OP_READ_LOCAL_INV)) { - siw_dbg_qp(qp, "cannot fence read\n"); - rv = -EINVAL; - goto out; - } - spin_lock(&qp->orq_lock); - - if (!siw_orq_empty(qp)) { - qp->tx_ctx.orq_fence = 1; - rv = 0; - } - spin_unlock(&qp->orq_lock); - - } else if (wqe->sqe.opcode == SIW_OP_READ || - wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { - struct siw_sqe *rreq; - - wqe->sqe.num_sge = 1; - - spin_lock(&qp->orq_lock); - - rreq = orq_get_free(qp); - if (rreq) { - /* - * Make an immediate copy in ORQ to be ready - * to process loopback READ reply - */ - siw_read_to_orq(rreq, &wqe->sqe); - qp->orq_put++; - } else { - qp->tx_ctx.orq_fence = 1; - rv = 0; - } - spin_unlock(&qp->orq_lock); - } - - /* Clear SQE, can be re-used by application */ - smp_store_mb(sqe->flags, 0); - qp->sq_get++; + /* start READ RESPONSE */ + wqe->sqe.opcode = SIW_OP_READ_RESPONSE; + wqe->sqe.flags = 0; + if (irqe->num_sge) { + wqe->sqe.num_sge = 1; + wqe->sqe.sge[0].length = irqe->sge[0].length; + wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; + wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; } else { - rv = 0; + wqe->sqe.num_sge = 0; } -out: - if (unlikely(rv < 0)) { - siw_dbg_qp(qp, "error %d\n", rv); - wqe->wr_status = SIW_WR_IDLE; - } - return rv; + + /* Retain original RREQ's message sequence number for + * potential error reporting cases. + */ + wqe->sqe.sge[1].length = irqe->sge[1].length; + + wqe->sqe.rkey = irqe->rkey; + wqe->sqe.raddr = irqe->raddr; + + wqe->processed = 0; + qp->irq_get++; + + /* mark current IRQ entry free */ + smp_store_mb(irqe->flags, 0); + + return 1; } /* diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 4bd1f1f84057..60116f20653c 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -680,6 +680,10 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) } spin_lock_irqsave(&qp->sq_lock, flags); + if (unlikely(!qp->attrs.irq_size)) { + run_sq = 0; + goto error_irq; + } if (tx_work->wr_status == SIW_WR_IDLE) { /* * immediately schedule READ response w/o @@ -712,8 +716,9 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) /* RRESP now valid as current TX wqe or placed into IRQ */ smp_store_mb(resp->flags, SIW_WQE_VALID); } else { - pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp), - qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size); +error_irq: + pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n", + qp_id(qp), qp->attrs.irq_size); siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, RDMAP_ETYPE_REMOTE_OPERATION, @@ -740,6 +745,9 @@ static int siw_orqe_start_rx(struct siw_qp *qp) struct siw_sqe *orqe; struct siw_wqe *wqe = NULL; + if (unlikely(!qp->attrs.orq_size)) + return -EPROTO; + /* make sure ORQ indices are current */ smp_mb(); @@ -796,8 +804,8 @@ int siw_proc_rresp(struct siw_qp *qp) */ rv = siw_orqe_start_rx(qp); if (rv) { - pr_warn("siw: [QP %u]: ORQ empty at idx %d\n", - qp_id(qp), qp->orq_get % qp->attrs.orq_size); + pr_warn("siw: [QP %u]: ORQ empty, size %d\n", + qp_id(qp), qp->attrs.orq_size); goto error_term; } rv = siw_rresp_check_ntoh(srx, frx); @@ -1290,11 +1298,13 @@ static int siw_rdmap_complete(struct siw_qp *qp, int error) wc_status); siw_wqe_put_mem(wqe, SIW_OP_READ); - if (!error) + if (!error) { rv = siw_check_tx_fence(qp); - else - /* Disable current ORQ eleement */ - WRITE_ONCE(orq_get_current(qp)->flags, 0); + } else { + /* Disable current ORQ element */ + if (qp->attrs.orq_size) + WRITE_ONCE(orq_get_current(qp)->flags, 0); + } break; case RDMAP_RDMA_READ_REQ: diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index d19d8325588b..7989c4043db4 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -1107,8 +1107,8 @@ int siw_qp_sq_process(struct siw_qp *qp) /* * RREQ may have already been completed by inbound RRESP! */ - if (tx_type == SIW_OP_READ || - tx_type == SIW_OP_READ_LOCAL_INV) { + if ((tx_type == SIW_OP_READ || + tx_type == SIW_OP_READ_LOCAL_INV) && qp->attrs.orq_size) { /* Cleanup pending entry in ORQ */ qp->orq_put--; qp->orq[qp->orq_put % qp->attrs.orq_size].flags = 0; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 68fd053fc774..e389d44e5591 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -365,13 +365,23 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, if (rv) goto err_out; + num_sqe = attrs->cap.max_send_wr; + num_rqe = attrs->cap.max_recv_wr; + /* All queue indices are derived from modulo operations * on a free running 'get' (consumer) and 'put' (producer) * unsigned counter. Having queue sizes at power of two * avoids handling counter wrap around. */ - num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); - num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr); + if (num_sqe) + num_sqe = roundup_pow_of_two(num_sqe); + else { + /* Zero sized SQ is not supported */ + rv = -EINVAL; + goto err_out; + } + if (num_rqe) + num_rqe = roundup_pow_of_two(num_rqe); if (udata) qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); @@ -379,7 +389,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe)); if (qp->sendq == NULL) { - siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe); rv = -ENOMEM; goto err_out_xa; } @@ -413,7 +422,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); if (qp->recvq == NULL) { - siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe); rv = -ENOMEM; goto err_out_xa; } @@ -966,9 +974,9 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, unsigned long flags; int rv = 0; - if (qp->srq) { + if (qp->srq || qp->attrs.rq_size == 0) { *bad_wr = wr; - return -EOPNOTSUPP; /* what else from errno.h? */ + return -EINVAL; } if (!rdma_is_kernel_res(&qp->base_qp.res)) { siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n"); From 3a4928cf5e3c91fc9703992358db23202ee3e82f Mon Sep 17 00:00:00 2001 From: Joe Pater <02joepater06@gmail.com> Date: Mon, 11 Jan 2021 10:32:41 +0000 Subject: [PATCH 007/414] Documentation: kernel-hacking: change 'current()' to 'current' Change 'current()' heading to 'current' to reflect usage. Signed-off-by: Joe Pater <02joepater06@gmail.com> Link: https://lore.kernel.org/r/20210111103240.7445-1-02joepater06@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/kernel-hacking/hacking.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/kernel-hacking/hacking.rst b/Documentation/kernel-hacking/hacking.rst index eed2136d847f..451523424942 100644 --- a/Documentation/kernel-hacking/hacking.rst +++ b/Documentation/kernel-hacking/hacking.rst @@ -346,8 +346,8 @@ routine. Before inventing your own cache of often-used objects consider using a slab cache in ``include/linux/slab.h`` -:c:func:`current()` -------------------- +:c:macro:`current` +------------------ Defined in ``include/asm/current.h`` From 05a5f51ca566674e6a6ee9cef0af1b00bf100d67 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 10 Jan 2021 12:41:44 -0800 Subject: [PATCH 008/414] Documentation: Replace lkml.org links with lore Replace the lkml.org links with lore to better use a single source that's more likely to stay available long-term. Done by bash script: cvt_lkml_to_lore () { tmpfile=$(mktemp ./.cvt_links.XXXXXXX) header=$(echo $1 | sed 's@/lkml/@/lkml/headers/@') wget -qO - $header > $tmpfile if [[ $? == 0 ]] ; then link=$(grep -i '^Message-Id:' $tmpfile | head -1 | \ sed -r -e 's/^\s*Message-Id:\s*<\s*//' -e 's/\s*>\s*$//' -e 's@^@https://lore.kernel.org/r/@') # echo "testlink: $link" if [ -n "$link" ] ; then wget -qO - $link > /dev/null if [[ $? == 0 ]] ; then echo $link fi fi fi rm -f $tmpfile } git grep -P -o "\bhttps?://(?:www.)?lkml.org/lkml[\/\w]+" $@ | while read line ; do echo $line file=$(echo $line | cut -f1 -d':') link=$(echo $line | cut -f2- -d':') newlink=$(cvt_lkml_to_lore $link) if [[ -n "$newlink" ]] ; then sed -i -e "s#\b$link\b#$newlink#" $file fi done Link: https://lore.kernel.org/patchwork/patch/1265849/#1462688 Signed-off-by: Joe Perches Link: https://lore.kernel.org/r/77cdb7f32cfb087955bfc3600b86c40bed5d4104.camel@perches.com Signed-off-by: Jonathan Corbet --- Documentation/RCU/RTFP.txt | 94 +++++++++---------- Documentation/accounting/cgroupstats.rst | 4 +- .../admin-guide/cgroup-v1/memory.rst | 14 +-- Documentation/admin-guide/cpu-load.rst | 2 +- .../admin-guide/kernel-per-CPU-kthreads.rst | 2 +- Documentation/driver-api/gpio/driver.rst | 4 +- Documentation/gpu/todo.rst | 2 +- Documentation/power/freezing-of-tasks.rst | 2 +- Documentation/process/adding-syscalls.rst | 18 ++-- Documentation/process/submitting-patches.rst | 4 +- Documentation/scheduler/sched-deadline.rst | 2 +- Documentation/security/lsm-development.rst | 2 +- Documentation/timers/timers-howto.rst | 2 +- .../it_IT/process/adding-syscalls.rst | 18 ++-- .../it_IT/process/submitting-patches.rst | 4 +- .../translations/ja_JP/SubmittingPatches | 4 +- .../zh_CN/admin-guide/cpu-load.rst | 2 +- .../zh_CN/process/submitting-patches.rst | 4 +- 18 files changed, 92 insertions(+), 92 deletions(-) diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt index 9bccf16736f7..3b0876c77355 100644 --- a/Documentation/RCU/RTFP.txt +++ b/Documentation/RCU/RTFP.txt @@ -683,7 +683,7 @@ Orran Krieger and Rusty Russell and Dipankar Sarma and Maneesh Soni" ,month="October" ,year="2001" ,note="Available: -\url{http://lkml.org/lkml/2001/10/13/105} +\url{https://lore.kernel.org/r/Pine.LNX.4.33.0110131015410.8707-100000@penguin.transmeta.com} [Viewed August 21, 2004]" ,annotation={ } @@ -826,7 +826,7 @@ Symposium on Distributed Computing} ,month="October" ,year="2002" ,note="Available: -\url{https://lkml.org/lkml/2002/10/24/262} +\url{https://lore.kernel.org/r/3DB86B05.447E7410@us.ibm.com} [Viewed February 15, 2014]" ,annotation={ Mingming Cao's patch to introduce RCU to SysV IPC. @@ -839,7 +839,7 @@ Symposium on Distributed Computing} ,month="March" ,year="2003" ,note="Available: -\url{http://lkml.org/lkml/2003/3/9/205} +\url{https://lore.kernel.org/r/Pine.LNX.4.44.0303091831560.2129-100000@home.transmeta.com} [Viewed March 13, 2006]" ,annotation={ Linus suggests replacing brlock with RCU and/or seqlocks: @@ -1036,15 +1036,15 @@ Add per-cpu batch counter" ,annotation={ RCU runs reasonably on a 512-CPU SGI using Manfred Spraul's patches, which may be found at: - https://lkml.org/lkml/2004/5/20/49 (split vars into cachelines) - https://lkml.org/lkml/2004/5/22/114 (cpu_quiet() patch) - https://lkml.org/lkml/2004/5/25/24 (0/5) - https://lkml.org/lkml/2004/5/25/23 (1/5) - https://lkml.org/lkml/2004/5/25/265 (works for Jack) - https://lkml.org/lkml/2004/5/25/20 (2/5) - https://lkml.org/lkml/2004/5/25/22 (3/5) - https://lkml.org/lkml/2004/5/25/19 (4/5) - https://lkml.org/lkml/2004/5/25/21 (5/5) + https://lore.kernel.org/r/40AC9823.6020709@colorfullife.com (split vars into cachelines) + https://lore.kernel.org/r/Pine.LNX.4.44.0405222141260.11106-100000@dbl.q-ag.de (cpu_quiet() patch) + https://lore.kernel.org/r/200405250535.i4P5ZJo8017583@dbl.q-ag.de (0/5) + https://lore.kernel.org/r/200405250535.i4P5ZKAQ017591@dbl.q-ag.de (1/5) + https://lore.kernel.org/r/20040525203215.GB5127@sgi.com (works for Jack) + https://lore.kernel.org/r/200405250535.i4P5ZLiR017599@dbl.q-ag.de (2/5) + https://lore.kernel.org/r/200405250535.i4P5ZMFt017607@dbl.q-ag.de (3/5) + https://lore.kernel.org/r/200405250535.i4P5ZN6g017615@dbl.q-ag.de (4/5) + https://lore.kernel.org/r/200405250535.i4P5ZO7I017623@dbl.q-ag.de (5/5) } } @@ -1106,7 +1106,7 @@ Oregon Health and Sciences University" ,month="August" ,year="2004" ,note="Available: -\url{http://lkml.org/lkml/2004/8/6/237} +\url{https://lore.kernel.org/r/20040807192424.GF3936@in.ibm.com} [Viewed June 8, 2010]" ,annotation={ Introduce rcu_dereference(). @@ -1119,7 +1119,7 @@ Oregon Health and Sciences University" ,month="August" ,year="2004" ,note="Available: -\url{http://lkml.org/lkml/2004/8/30/87} +\url{https://lore.kernel.org/r/1093873222.984.12.camel@new.localdomain} [Viewed February 17, 2005]" ,annotation={ Uses active code in rcu_read_lock() and rcu_read_unlock() to @@ -1186,7 +1186,7 @@ Oregon Health and Sciences University" ,month="October" ,year="2004" ,note="Available: -\url{http://lkml.org/lkml/2004/10/23/241} +\url{https://lore.kernel.org/r/20041023202723.GA1930@us.ibm.com} [Viewed June 8, 2010]" ,annotation={ Introduce rcu_assign_pointer(). @@ -1203,7 +1203,7 @@ Oregon Health and Sciences University" ,annotation={ James Morris posts Kaigai Kohei's patch to LKML. [Viewed December 10, 2004] - Kaigai's patch is at https://lkml.org/lkml/2004/9/27/52 + Kaigai's patch is at https://lore.kernel.org/r/200409271057.i8RAvcA1007873@mailsv.bs1.fc.nec.co.jp } } @@ -1241,7 +1241,7 @@ Oregon Health and Sciences University" ,year="2005" ,day="17" ,note="Available: -\url{http://lkml.org/lkml/2005/3/17/199} +\url{https://lore.kernel.org/r/20050318002026.GA2693@us.ibm.com} [Viewed September 5, 2005]" ,annotation={ First posting showing how RCU can be safely adapted for @@ -1256,7 +1256,7 @@ Oregon Health and Sciences University" ,year="2005" ,day="18" ,note="Available: -\url{http://lkml.org/lkml/2005/3/18/122} +\url{https://lore.kernel.org/r/Pine.OSF.4.05.10503181336310.2466-100000@da410.phys.au.dk} [Viewed March 30, 2006]" ,annotation={ Esben Neilsen suggests read-side suppression of grace-period @@ -1302,7 +1302,7 @@ Data Structures" ,month="May" ,year="2005" ,note="Available: -\url{http://lkml.org/lkml/2005/5/9/185} +\url{https://lore.kernel.org/r/20050510012444.GA3011@us.ibm.com} [Viewed May 13, 2005]" ,annotation={ First publication of working lock-based deferred free patches @@ -1385,7 +1385,7 @@ Data Structures" ,day="1" ,year="2005" ,note="Available: -\url{http://lkml.org/lkml/2005/8/1/155} +\url{https://lore.kernel.org/r/20050801171137.GA1754@us.ibm.com} [Viewed March 14, 2006]" ,annotation={ First operating counter-based realtime RCU patch posted to LKML. @@ -1399,7 +1399,7 @@ Data Structures" ,day="8" ,year="2005" ,note="Available: -\url{http://lkml.org/lkml/2005/8/8/108} +\url{https://lore.kernel.org/r/20050808144216.GA1307@us.ibm.com} [Viewed March 14, 2006]" ,annotation={ First operating counter-based realtime RCU patch posted to LKML, @@ -1415,7 +1415,7 @@ Data Structures" ,day="1" ,year="2005" ,note="Available: -\url{http://lkml.org/lkml/2005/10/1/70} +\url{https://lore.kernel.org/r/20051001182056.GA1613@us.ibm.com} [Viewed March 14, 2006]" ,annotation={ First rcutorture patch. @@ -1429,7 +1429,7 @@ Data Structures" ,day="6" ,year="2006" ,note="Available: -\url{https://lkml.org/lkml/2006/1/7/22} +\url{https://lore.kernel.org/r/20060106.231054.43576567.davem@davemloft.net} [Viewed February 29, 2012]" ,annotation={ David Miller's view on hashed arrays of locks: used to really @@ -1464,7 +1464,7 @@ Distributed Processing Symposium" ,day="20" ,year="2006" ,note="Available: -\url{http://lkml.org/lkml/2006/6/20/238} +\url{https://lore.kernel.org/r/20060408134707.22479.33814.sendpatchset@linux.site} [Viewed March 25, 2008]" ,annotation={ RCU-protected radix tree. @@ -1554,7 +1554,7 @@ Revised: ,day="28" ,year="2006" ,note="Available: -\url{http://lkml.org/lkml/2006/9/28/160} +\url{https://lore.kernel.org/r/20060928142616.GA20185@infradead.org} [Viewed March 27, 2008]" } @@ -1593,7 +1593,7 @@ Revised: ,year="2006" ,day=26 ,note="Available: -\url{http://lkml.org/lkml/2006/10/26/73} +\url{https://lore.kernel.org/r/20061026105731.GE11803@in.ibm.com} [Viewed January 26, 2009]" ,annotation={ RCU-based reader-writer lock that allows readers to proceed with @@ -1612,12 +1612,12 @@ Revised: ,year="2006" ,day=17 ,note="Available: -\url{http://lkml.org/lkml/2006/11/17/56} +\url{https://lore.kernel.org/r/20061117092925.GT7164@kernel.dk} [Viewed May 28, 2007]" ,annotation={ SRCU's grace periods are too slow for Jens, even after a factor-of-three speedup. - Sped-up version of SRCU at http://lkml.org/lkml/2006/11/17/359. + Sped-up version of SRCU at https://lore.kernel.org/r/20061118002845.GF2632@us.ibm.com. } } @@ -1629,7 +1629,7 @@ Revised: ,year="2006" ,day=19 ,note="Available: -\url{http://lkml.org/lkml/2006/11/19/69} +\url{https://lore.kernel.org/r/20061119190027.GA3676@oleg} [Viewed May 28, 2007]" ,annotation={ First cut of QRCU. Expanded/corrected versions followed. @@ -1644,7 +1644,7 @@ Revised: ,year="2006" ,day=30 ,note="Available: -\url{http://lkml.org/lkml/2006/11/29/330} +\url{https://lore.kernel.org/r/20061130015714.GC1350@oleg} [Viewed November 26, 2008]" ,annotation={ Expanded/corrected version of QRCU. @@ -1709,7 +1709,7 @@ Revised: ,year="2007" ,day=3 ,note="Available: -\url{http://lkml.org/lkml/2007/1/3/112} +\url{https://lore.kernel.org/r/20070103152738.GA16063@localdomain} [Viewed May 28, 2007]" ,annotation={ Patch for list_splice_rcu(). @@ -1737,7 +1737,7 @@ Revised: ,year="2007" ,day=28 ,note="Available: -\url{http://lkml.org/lkml/2007/1/28/34} +\url{https://lore.kernel.org/r/20070128120509.719287000@programming.kicks-ass.net} [Viewed March 27, 2008]" ,annotation={ RCU-like implementation for frequent updaters and rare readers(!). @@ -1767,7 +1767,7 @@ Revised: ,year="2007" ,day=24 ,note="Available: -\url{http://lkml.org/lkml/2007/2/25/18} +\url{https://lore.kernel.org/r/20070225062349.GA17468@linux.vnet.ibm.com} [Viewed March 27, 2008]" ,annotation={ Patch for QRCU supplying lock-free fast path. @@ -1846,7 +1846,7 @@ Revised: ,annotation={ LWN article describing Promela and spin, and also using Oleg Nesterov's QRCU as an example (with Paul McKenney's fastpath). - Merged patch at: http://lkml.org/lkml/2007/2/25/18 + Merged patch at: https://lore.kernel.org/r/20070225062349.GA17468@linux.vnet.ibm.com } } @@ -1885,7 +1885,7 @@ Revised: ,day="10" ,year="2007" ,note="Available: -\url{http://lkml.org/lkml/2007/9/10/213} +\url{https://lore.kernel.org/r/20070910183004.GA3299@linux.vnet.ibm.com} [Viewed October 25, 2007]" ,annotation={ Final patch for preemptable RCU to -rt. (Later patches were @@ -1933,7 +1933,7 @@ Revised: ,day="20" ,year="2007" ,note="Available: -\url{http://lkml.org/lkml/2007/12/20/244} +\url{https://lore.kernel.org/r/20071220142540.GB22523@Krystal} [Viewed March 27, 2008]" ,annotation={ Request for call_rcu_sched() and rcu_barrier_sched(). @@ -2013,7 +2013,7 @@ Revised: ,day="29" ,year="2008" ,note="Available: -\url{http://lkml.org/lkml/2008/1/29/208} +\url{https://lore.kernel.org/r/Pine.LNX.4.58.0801291113350.20371@gandalf.stny.rr.com} [Viewed March 27, 2008]" ,annotation={ Patch that prevents preemptible RCU from unnecessarily waking @@ -2028,7 +2028,7 @@ Revised: ,day="1" ,year="2008" ,note="Available: -\url{http://lkml.org/lkml/2008/2/2/255} +\url{https://lore.kernel.org/r/20080202214124.GA28612@linux.vnet.ibm.com} [Viewed October 18, 2008]" ,annotation={ Explanation of compilers violating dependency ordering. @@ -2088,7 +2088,7 @@ lot of {Linux} into your technology!!!" ,day="3" ,year="2008" ,note="Available: -\url{http://lkml.org/lkml/2008/6/2/539} +\url{https://lore.kernel.org/r/4844BE83.5010401@cn.fujitsu.com} [Viewed December 10, 2008]" ,annotation={ Updated RCU classic algorithm. Introduced multi-tailed list @@ -2122,7 +2122,7 @@ lot of {Linux} into your technology!!!" ,day="21" ,year="2008" ,note="Available: -\url{http://lkml.org/lkml/2008/8/21/336} +\url{https://lore.kernel.org/r/48AD8969.7060900@colorfullife.com} [Viewed December 8, 2008]" ,annotation={ State-based RCU. One key thing that this patch does is to @@ -2137,7 +2137,7 @@ lot of {Linux} into your technology!!!" ,day="6" ,year="2008" ,note="Available: -\url{http://lkml.org/lkml/2008/9/6/86} +\url{https://lore.kernel.org/r/48C2B1D2.5070801@colorfullife.com} [Viewed December 8, 2008]" ,annotation={ Manfred notes a fix required to my attempt to separate irq @@ -2183,7 +2183,7 @@ lot of {Linux} into your technology!!!" ,day="14" ,year="2009" ,note="Available: -\url{http://lkml.org/lkml/2009/1/14/449} +\url{https://lore.kernel.org/r/20090114202044.GJ6734@linux.vnet.ibm.com} [Viewed January 15, 2009]" ,annotation={ Small-footprint implementation of RCU for uniprocessor @@ -2218,7 +2218,7 @@ lot of {Linux} into your technology!!!" git://lttng.org/userspace-rcu.git http://lttng.org/cgi-bin/gitweb.cgi?p=userspace-rcu.git http://lttng.org/urcu - http://lkml.org/lkml/2009/2/5/572 + https://lore.kernel.org/r/20090206030543.GB8560@Krystal } } @@ -2258,7 +2258,7 @@ lot of {Linux} into your technology!!!" ,day="25" ,year="2009" ,note="Available: -\url{http://lkml.org/lkml/2009/6/25/306} +\url{https://lore.kernel.org/r/20090625160706.GA9467@linux.vnet.ibm.com} [Viewed August 16, 2009]" ,annotation={ First posting of expedited RCU to be accepted into -tip. @@ -2272,7 +2272,7 @@ lot of {Linux} into your technology!!!" ,day="23" ,year="2009" ,note="Available: -\url{http://lkml.org/lkml/2009/7/23/294} +\url{https://lore.kernel.org/r/20090724001429.GA17374@linux.vnet.ibm.com} [Viewed August 15, 2009]" ,annotation={ First posting of simple and fast preemptable RCU. @@ -2350,7 +2350,7 @@ lot of {Linux} into your technology!!!" ,month="December" ,year="2009" ,note="Available: -\url{http://lkml.org/lkml/2009/10/18/129} +\url{https://lore.kernel.org/r/20091018232918.GA7385@Krystal} [Viewed December 29, 2009]" ,annotation={ Mathieu proposed defer_rcu() with fixed-size per-thread pool @@ -2518,7 +2518,7 @@ lot of {Linux} into your technology!!!" ,month="January" ,year="2011" ,note="Available: -\url{https://lkml.org/lkml/2011/1/18/322} +\url{https://lore.kernel.org/r/AANLkTimajU0x1v6y3rH2+jr-bZ=tNLs1S_agXdGGAa3S@mail.gmail.com} [Viewed March 4, 2011]" ,annotation={ "The RCU-based name lookup is at the other end of the spectrum - the diff --git a/Documentation/accounting/cgroupstats.rst b/Documentation/accounting/cgroupstats.rst index b9afc48f4ea2..85186e7d4035 100644 --- a/Documentation/accounting/cgroupstats.rst +++ b/Documentation/accounting/cgroupstats.rst @@ -3,8 +3,8 @@ Control Groupstats ================== Control Groupstats is inspired by the discussion at -http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as -suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263. +https://lore.kernel.org/r/461CF883.2030308@sw.ru and implements per cgroup statistics as +suggested by Andrew Morton in https://lore.kernel.org/r/20070411114927.1277d7c9.akpm@linux-foundation.org. Per cgroup statistics infrastructure re-uses code from the taskstats interface. A new set of cgroup operations are registered with commands diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 52688ae34461..0936412e044e 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -963,21 +963,21 @@ References 2. Singh, Balbir. Memory Controller (RSS Control), http://lwn.net/Articles/222762/ 3. Emelianov, Pavel. Resource controllers based on process cgroups - http://lkml.org/lkml/2007/3/6/198 + https://lore.kernel.org/r/45ED7DEC.7010403@sw.ru 4. Emelianov, Pavel. RSS controller based on process cgroups (v2) - http://lkml.org/lkml/2007/4/9/78 + https://lore.kernel.org/r/461A3010.90403@sw.ru 5. Emelianov, Pavel. RSS controller based on process cgroups (v3) - http://lkml.org/lkml/2007/5/30/244 + https://lore.kernel.org/r/465D9739.8070209@openvz.org 6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/ 7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control subsystem (v3), http://lwn.net/Articles/235534/ 8. Singh, Balbir. RSS controller v2 test results (lmbench), - http://lkml.org/lkml/2007/5/17/232 + https://lore.kernel.org/r/464C95D4.7070806@linux.vnet.ibm.com 9. Singh, Balbir. RSS controller v2 AIM9 results - http://lkml.org/lkml/2007/5/18/1 + https://lore.kernel.org/r/464D267A.50107@linux.vnet.ibm.com 10. Singh, Balbir. Memory controller v6 test results, - http://lkml.org/lkml/2007/8/19/36 + https://lore.kernel.org/r/20070819094658.654.84837.sendpatchset@balbir-laptop 11. Singh, Balbir. Memory controller introduction (v6), - http://lkml.org/lkml/2007/8/17/69 + https://lore.kernel.org/r/20070817084228.26003.12568.sendpatchset@balbir-laptop 12. Corbet, Jonathan, Controlling memory use in cgroups, http://lwn.net/Articles/243795/ diff --git a/Documentation/admin-guide/cpu-load.rst b/Documentation/admin-guide/cpu-load.rst index f3ada90e9ca8..21a984337080 100644 --- a/Documentation/admin-guide/cpu-load.rst +++ b/Documentation/admin-guide/cpu-load.rst @@ -107,7 +107,7 @@ will lead to quite erratic information inside ``/proc/stat``:: References ---------- -- http://lkml.org/lkml/2007/2/12/6 +- https://lore.kernel.org/r/loom.20070212T063225-663@post.gmane.org - Documentation/filesystems/proc.rst (1.8) diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst index dc36aeb65d0a..531f689311f2 100644 --- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst +++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst @@ -273,7 +273,7 @@ To reduce its OS jitter, do any of the following: However, there is an RFC patch from Christoph Lameter (based on an earlier one from Gilad Ben-Yossef) that reduces or even eliminates vmstat overhead for some - workloads at https://lkml.org/lkml/2013/9/4/379. + workloads at https://lore.kernel.org/r/00000140e9dfd6bd-40db3d4f-c1be-434f-8132-7820f81bb586-000000@email.amazonses.com. e. If running on high-end powerpc servers, build with CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS daemon from running on each CPU every second or so. diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst index 0fb57e298b41..d6b0d779859b 100644 --- a/Documentation/driver-api/gpio/driver.rst +++ b/Documentation/driver-api/gpio/driver.rst @@ -640,8 +640,8 @@ compliance: level and edge IRQs * [1] http://www.spinics.net/lists/linux-omap/msg120425.html -* [2] https://lkml.org/lkml/2015/9/25/494 -* [3] https://lkml.org/lkml/2015/9/25/495 +* [2] https://lore.kernel.org/r/1443209283-20781-2-git-send-email-grygorii.strashko@ti.com +* [3] https://lore.kernel.org/r/1443209283-20781-3-git-send-email-grygorii.strashko@ti.com Requesting self-owned GPIO pins diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 009d8e6c7e3c..d42c22567c5d 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -669,7 +669,7 @@ for fbdev. https://patchwork.freedesktop.org/patch/306579/ - [RFC PATCH v2 00/13] Kernel based bootsplash - https://lkml.org/lkml/2017/12/13/764 + https://lore.kernel.org/r/20171213194755.3409-1-mstaudt@suse.de Contact: Sam Ravnborg diff --git a/Documentation/power/freezing-of-tasks.rst b/Documentation/power/freezing-of-tasks.rst index 8bd693399834..53b6a56c4635 100644 --- a/Documentation/power/freezing-of-tasks.rst +++ b/Documentation/power/freezing-of-tasks.rst @@ -134,7 +134,7 @@ Generally speaking, there is a couple of reasons to use the freezing of tasks: safeguards against race conditions that might occur in such a case. Although Linus Torvalds doesn't like the freezing of tasks, he said this in one -of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608): +of the discussions on LKML (https://lore.kernel.org/r/alpine.LFD.0.98.0704271801020.9964@woody.linux-foundation.org): "RJW:> Why we freeze tasks at all or why we freeze kernel threads? diff --git a/Documentation/process/adding-syscalls.rst b/Documentation/process/adding-syscalls.rst index a3ecb236576c..02857b5ad2b5 100644 --- a/Documentation/process/adding-syscalls.rst +++ b/Documentation/process/adding-syscalls.rst @@ -548,18 +548,18 @@ References and Sources https://lwn.net/Articles/486306/ - Recommendation from Andrew Morton that all related information for a new system call should come in the same email thread: - https://lkml.org/lkml/2014/7/24/641 + https://lore.kernel.org/r/20140724144747.3041b208832bbdf9fbce5d96@linux-foundation.org - Recommendation from Michael Kerrisk that a new system call should come with - a man page: https://lkml.org/lkml/2014/6/13/309 + a man page: https://lore.kernel.org/r/CAKgNAkgMA39AfoSoA5Pe1r9N+ZzfYQNvNPvcRN7tOvRb8+v06Q@mail.gmail.com - Suggestion from Thomas Gleixner that x86 wire-up should be in a separate - commit: https://lkml.org/lkml/2014/11/19/254 + commit: https://lore.kernel.org/r/alpine.DEB.2.11.1411191249560.3909@nanos - Suggestion from Greg Kroah-Hartman that it's good for new system calls to - come with a man-page & selftest: https://lkml.org/lkml/2014/3/19/710 + come with a man-page & selftest: https://lore.kernel.org/r/20140320025530.GA25469@kroah.com - Discussion from Michael Kerrisk of new system call vs. :manpage:`prctl(2)` extension: - https://lkml.org/lkml/2014/6/3/411 + https://lore.kernel.org/r/CAHO5Pa3F2MjfTtfNxa8LbnkeeU8=YJ+9tDqxZpw7Gz59E-4AUg@mail.gmail.com - Suggestion from Ingo Molnar that system calls that involve multiple arguments should encapsulate those arguments in a struct, which includes a - size field for future extensibility: https://lkml.org/lkml/2015/7/30/117 + size field for future extensibility: https://lore.kernel.org/r/20150730083831.GA22182@gmail.com - Numbering oddities arising from (re-)use of O_* numbering space flags: - commit 75069f2b5bfb ("vfs: renumber FMODE_NONOTIFY and add to uniqueness @@ -569,9 +569,9 @@ References and Sources - commit bb458c644a59 ("Safer ABI for O_TMPFILE") - Discussion from Matthew Wilcox about restrictions on 64-bit arguments: - https://lkml.org/lkml/2008/12/12/187 + https://lore.kernel.org/r/20081212152929.GM26095@parisc-linux.org - Recommendation from Greg Kroah-Hartman that unknown flags should be - policed: https://lkml.org/lkml/2014/7/17/577 + policed: https://lore.kernel.org/r/20140717193330.GB4703@kroah.com - Recommendation from Linus Torvalds that x32 system calls should prefer compatibility with 64-bit versions rather than 32-bit versions: - https://lkml.org/lkml/2011/8/31/244 + https://lore.kernel.org/r/CA+55aFxfmwfB7jbbrXxa=K7VBYPfAvmu3XOkGrLbB1UFjX1+Ew@mail.gmail.com diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 5ba54120bef7..7c97ad580e7d 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -769,13 +769,13 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer". NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! - + Kernel Documentation/process/coding-style.rst: :ref:`Documentation/process/coding-style.rst ` Linus Torvalds's mail on the canonical patch format: - + Andi Kleen, "On submitting kernel patches" Some strategies to get difficult or controversial changes in. diff --git a/Documentation/scheduler/sched-deadline.rst b/Documentation/scheduler/sched-deadline.rst index 14a2f7bf63fe..9d9be52f221a 100644 --- a/Documentation/scheduler/sched-deadline.rst +++ b/Documentation/scheduler/sched-deadline.rst @@ -707,7 +707,7 @@ Deadline Task Scheduling and how to prevent non-root users "cheat" the system? As already discussed, we are planning also to merge this work with the EDF - throttling patches [https://lkml.org/lkml/2010/2/23/239] but we still are in + throttling patches [https://lore.kernel.org/r/cover.1266931410.git.fabio@helm.retis] but we still are in the preliminary phases of the merge and we really seek feedback that would help us decide on the direction it should take. diff --git a/Documentation/security/lsm-development.rst b/Documentation/security/lsm-development.rst index 31d92bc5fdd2..ac53e5065f79 100644 --- a/Documentation/security/lsm-development.rst +++ b/Documentation/security/lsm-development.rst @@ -2,7 +2,7 @@ Linux Security Module Development ================================= -Based on https://lkml.org/lkml/2007/10/26/215, +Based on https://lore.kernel.org/r/20071026073721.618b4778@laptopd505.fenrus.org, a new LSM is accepted into the kernel when its intent (a description of what it tries to protect against and in what cases one would expect to use it) has been appropriately documented in ``Documentation/admin-guide/LSM/``. diff --git a/Documentation/timers/timers-howto.rst b/Documentation/timers/timers-howto.rst index afb0a43b8cdf..5c169e3d29a8 100644 --- a/Documentation/timers/timers-howto.rst +++ b/Documentation/timers/timers-howto.rst @@ -75,7 +75,7 @@ NON-ATOMIC CONTEXT: - Why not msleep for (1ms - 20ms)? Explained originally here: - http://lkml.org/lkml/2007/8/3/250 + https://lore.kernel.org/r/15327.1186166232@lwn.net msleep(1~20) may not do what the caller intends, and will often sleep longer (~20 ms actual sleep for any diff --git a/Documentation/translations/it_IT/process/adding-syscalls.rst b/Documentation/translations/it_IT/process/adding-syscalls.rst index bff0a82bf127..c478b6e8c292 100644 --- a/Documentation/translations/it_IT/process/adding-syscalls.rst +++ b/Documentation/translations/it_IT/process/adding-syscalls.rst @@ -611,21 +611,21 @@ Riferimenti e fonti https://lwn.net/Articles/486306/ - Raccomandazioni da Andrew Morton circa il fatto che tutte le informazioni su una nuova chiamata di sistema dovrebbero essere contenute nello stesso - filone di discussione di email: https://lkml.org/lkml/2014/7/24/641 + filone di discussione di email: https://lore.kernel.org/r/20140724144747.3041b208832bbdf9fbce5d96@linux-foundation.org - Raccomandazioni da Michael Kerrisk circa il fatto che le nuove chiamate di - sistema dovrebbero avere una pagina man: https://lkml.org/lkml/2014/6/13/309 + sistema dovrebbero avere una pagina man: https://lore.kernel.org/r/CAKgNAkgMA39AfoSoA5Pe1r9N+ZzfYQNvNPvcRN7tOvRb8+v06Q@mail.gmail.com - Consigli da Thomas Gleixner sul fatto che il collegamento all'architettura x86 dovrebbe avvenire in un *commit* differente: - https://lkml.org/lkml/2014/11/19/254 + https://lore.kernel.org/r/alpine.DEB.2.11.1411191249560.3909@nanos - Consigli da Greg Kroah-Hartman circa la bontà d'avere una pagina man e un programma di auto-verifica per le nuove chiamate di sistema: - https://lkml.org/lkml/2014/3/19/710 + https://lore.kernel.org/r/20140320025530.GA25469@kroah.com - Discussione di Michael Kerrisk sulle nuove chiamate di sistema contro - le estensioni :manpage:`prctl(2)`: https://lkml.org/lkml/2014/6/3/411 + le estensioni :manpage:`prctl(2)`: https://lore.kernel.org/r/CAHO5Pa3F2MjfTtfNxa8LbnkeeU8=YJ+9tDqxZpw7Gz59E-4AUg@mail.gmail.com - Consigli da Ingo Molnar che le chiamate di sistema con più argomenti dovrebbero incapsularli in una struttura che includa un argomento *size* per garantire l'estensibilità futura: - https://lkml.org/lkml/2015/7/30/117 + https://lore.kernel.org/r/20150730083831.GA22182@gmail.com - Un certo numero di casi strani emersi dall'uso (riuso) dei flag O_*: - commit 75069f2b5bfb ("vfs: renumber FMODE_NONOTIFY and add to uniqueness @@ -635,9 +635,9 @@ Riferimenti e fonti - commit bb458c644a59 ("Safer ABI for O_TMPFILE") - Discussion from Matthew Wilcox about restrictions on 64-bit arguments: - https://lkml.org/lkml/2008/12/12/187 + https://lore.kernel.org/r/20081212152929.GM26095@parisc-linux.org - Raccomandazioni da Greg Kroah-Hartman sul fatto che i flag sconosciuti dovrebbero - essere controllati: https://lkml.org/lkml/2014/7/17/577 + essere controllati: https://lore.kernel.org/r/20140717193330.GB4703@kroah.com - Raccomandazioni da Linus Torvalds che le chiamate di sistema x32 dovrebbero favorire la compatibilità con le versioni a 64-bit piuttosto che quelle a 32-bit: - https://lkml.org/lkml/2011/8/31/244 + https://lore.kernel.org/r/CA+55aFxfmwfB7jbbrXxa=K7VBYPfAvmu3XOkGrLbB1UFjX1+Ew@mail.gmail.com diff --git a/Documentation/translations/it_IT/process/submitting-patches.rst b/Documentation/translations/it_IT/process/submitting-patches.rst index 966cd3242a60..ae00352346ed 100644 --- a/Documentation/translations/it_IT/process/submitting-patches.rst +++ b/Documentation/translations/it_IT/process/submitting-patches.rst @@ -731,13 +731,13 @@ Greg Kroah-Hartman, "Come scocciare un manutentore di un sottosistema" No!!!! Basta gigantesche bombe patch alle persone sulla lista linux-kernel@vger.kernel.org! - + Kernel Documentation/translations/it_IT/process/coding-style.rst: :ref:`Documentation/translations/it_IT/process/coding-style.rst ` E-mail di Linus Torvalds sul formato canonico di una patch: - + Andi Kleen, "Su come sottomettere patch del kernel" Alcune strategie su come sottomettere modifiche toste o controverse. diff --git a/Documentation/translations/ja_JP/SubmittingPatches b/Documentation/translations/ja_JP/SubmittingPatches index dd0c3280ba5a..6854f5add72e 100644 --- a/Documentation/translations/ja_JP/SubmittingPatches +++ b/Documentation/translations/ja_JP/SubmittingPatches @@ -702,13 +702,13 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer". NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! - + Kernel Documentation/process/coding-style.rst: Linus Torvalds's mail on the canonical patch format: - + Andi Kleen, "On submitting kernel patches" Some strategies to get difficult or controversial changes in. diff --git a/Documentation/translations/zh_CN/admin-guide/cpu-load.rst b/Documentation/translations/zh_CN/admin-guide/cpu-load.rst index c972731c0e57..a73400a054ff 100644 --- a/Documentation/translations/zh_CN/admin-guide/cpu-load.rst +++ b/Documentation/translations/zh_CN/admin-guide/cpu-load.rst @@ -95,7 +95,7 @@ Linux通过``/proc/stat``和``/proc/uptime``导出各种信息,用户空间工 参考 --- -- http://lkml.org/lkml/2007/2/12/6 +- https://lore.kernel.org/r/loom.20070212T063225-663@post.gmane.org - Documentation/filesystems/proc.rst (1.8) diff --git a/Documentation/translations/zh_CN/process/submitting-patches.rst b/Documentation/translations/zh_CN/process/submitting-patches.rst index 2e7dbaad4028..4fc6d16f5196 100644 --- a/Documentation/translations/zh_CN/process/submitting-patches.rst +++ b/Documentation/translations/zh_CN/process/submitting-patches.rst @@ -668,13 +668,13 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer". NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! - + Kernel Documentation/process/coding-style.rst: :ref:`Documentation/translations/zh_CN/process/coding-style.rst ` Linus Torvalds's mail on the canonical patch format: - + Andi Kleen, "On submitting kernel patches" Some strategies to get difficult or controversial changes in. From 6a2195a104a4c2ca5c428169700ed8a45fbe6893 Mon Sep 17 00:00:00 2001 From: Liao Pingfang Date: Sun, 10 Jan 2021 15:59:59 +0800 Subject: [PATCH 009/414] docs: filesystems: vfs: Correct the struct name The struct name should be file_system_type instead of file_system_operations. Signed-off-by: Liao Pingfang Link: https://lore.kernel.org/r/1610265599-5101-1-git-send-email-winndows@163.com Signed-off-by: Jonathan Corbet --- Documentation/filesystems/vfs.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index ca52c82e5bb5..18d69a4559d6 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -112,7 +112,7 @@ members are defined: .. code-block:: c - struct file_system_operations { + struct file_system_type { const char *name; int fs_flags; struct dentry *(*mount) (struct file_system_type *, int, From c4c6b86acff7b3465e537fd6d9fce63f85584e78 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Thu, 7 Jan 2021 22:11:18 +0800 Subject: [PATCH 010/414] Documentation: Fix typos found in cgroup-v2.rst Fix typos found in Documentation/admin-guide/cgroup-v2.rst. Signed-off-by: Jiang Biao Link: https://lore.kernel.org/r/20210107141118.9530-1-benbjiang@tencent.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/cgroup-v2.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 63521cd36ce5..e0f6ff7cfa93 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1558,7 +1558,7 @@ IO Interface Files 8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 dbytes=50331648 dios=3021 io.cost.qos - A read-write nested-keyed file with exists only on the root + A read-write nested-keyed file which exists only on the root cgroup. This file configures the Quality of Service of the IO cost @@ -1613,7 +1613,7 @@ IO Interface Files automatic mode can be restored by setting "ctrl" to "auto". io.cost.model - A read-write nested-keyed file with exists only on the root + A read-write nested-keyed file which exists only on the root cgroup. This file configures the cost model of the IO cost model based From 85430c22e5ae87eff7b491d7a363c81325b94b8a Mon Sep 17 00:00:00 2001 From: Hao Li Date: Wed, 6 Jan 2021 09:50:00 +0800 Subject: [PATCH 011/414] Documentation/dax: Update description of DAX policy changing After commit 77573fa310d9 ("fs: Kill DCACHE_DONTCACHE dentry even if DCACHE_REFERENCED is set"), changes to DAX policy will take effect as soon as all references to this file are gone. Update the documentation accordingly. Signed-off-by: Hao Li Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/20210106015000.5263-1-lihao2018.fnst@cn.fujitsu.com Signed-off-by: Jonathan Corbet --- Documentation/filesystems/dax.txt | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt index 8fdb78f3c6c9..e03c20564f3a 100644 --- a/Documentation/filesystems/dax.txt +++ b/Documentation/filesystems/dax.txt @@ -83,20 +83,9 @@ Summary directories. This has runtime constraints and limitations that are described in 6) below. - 6. When changing the S_DAX policy via toggling the persistent FS_XFLAG_DAX flag, - the change in behaviour for existing regular files may not occur - immediately. If the change must take effect immediately, the administrator - needs to: - - a) stop the application so there are no active references to the data set - the policy change will affect - - b) evict the data set from kernel caches so it will be re-instantiated when - the application is restarted. This can be achieved by: - - i. drop-caches - ii. a filesystem unmount and mount cycle - iii. a system reboot + 6. When changing the S_DAX policy via toggling the persistent FS_XFLAG_DAX + flag, the change to existing regular files won't take effect until the + files are closed by all processes. Details From 7178b4a7d69ca96db6e0bb3593c202c2dd323089 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 1 Jan 2021 22:52:13 +0100 Subject: [PATCH 012/414] docs: Include ext4 documentation via filesystems/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The documentation for other filesystems is already included via filesystems/index.rst. Include ext4 in the same way and remove it from the top-level table of contents. Signed-off-by: Jonathan Neuschäfer Link: https://lore.kernel.org/r/20210101215215.1047826-1-j.neuschaefer@gmx.net Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/index.rst | 11 ----------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 7be9b46d85d9..1f76b1cb3348 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -83,6 +83,7 @@ Documentation for filesystem implementations. erofs ext2 ext3 + ext4/index f2fs gfs2 gfs2-uevents diff --git a/Documentation/index.rst b/Documentation/index.rst index 5888e8a7272f..31f2adc8542d 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -171,17 +171,6 @@ implementation. x86/index xtensa/index -Filesystem Documentation ------------------------- - -The documentation in this section are provided by specific filesystem -subprojects. - -.. toctree:: - :maxdepth: 2 - - filesystems/ext4/index - Other documentation ------------------- From 7594bb08fb68ffe7e77c2e77ecc33c1e21631ead Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Sat, 19 Dec 2020 00:35:25 +0900 Subject: [PATCH 013/414] Documentation: document dma device use for mcb Hannes reported a problem with setting up dma transfers on a mcb device. The problem boiled down to the use of a wrong 'device' for the dma functions. Document how to setup dma transfers for a IP core on a mcb carrier. Reported-by: Hannes Duerr Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/3bdc8f76b30c2b0e2a2bfab06c2e73797ddc9384.1608305690.git.johannes.thumshirn@wdc.com Signed-off-by: Jonathan Corbet --- Documentation/driver-api/men-chameleon-bus.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/driver-api/men-chameleon-bus.rst b/Documentation/driver-api/men-chameleon-bus.rst index 1b1f048aa748..6f0b9ee47595 100644 --- a/Documentation/driver-api/men-chameleon-bus.rst +++ b/Documentation/driver-api/men-chameleon-bus.rst @@ -18,6 +18,7 @@ MEN Chameleon Bus 4.1 The driver structure 4.2 Probing and attaching 4.3 Initializing the driver + 4.4 Using DMA Introduction @@ -173,3 +174,14 @@ module at the MCB core:: The module_mcb_driver() macro can be used to reduce the above code:: module_mcb_driver(foo_driver); + +Using DMA +--------- + +To make use of the kernel's DMA-API's function, you will need to use the +carrier device's 'struct device'. Fortunately 'struct mcb_device' embeds a +pointer (->dma_dev) to the carrier's device for DMA purposes:: + + ret = dma_set_mask_and_coherent(&mdev->dma_dev, DMA_BIT_MASK(dma_bits)); + if (rc) + /* Handle errors */ From ee037040f808848bf7355edd081d33bdb1cfd011 Mon Sep 17 00:00:00 2001 From: Marc Koderer Date: Mon, 28 Dec 2020 07:04:15 +0100 Subject: [PATCH 014/414] samples/kprobes: Remove misleading comment The example file supports many architectures not only x86 and PPC. Signed-off-by: Marc Koderer Cc: trivial@kernel.org Link: https://lore.kernel.org/r/20201228060415.2194-1-marc@koderer.com Signed-off-by: Jonathan Corbet --- samples/kprobes/kprobe_example.c | 1 - 1 file changed, 1 deletion(-) diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index 365905cb24b1..192aa68db0c0 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * NOTE: This example is works on x86 and powerpc. * Here's a sample kernel module showing the use of kprobes to dump a * stack trace and selected registers when kernel_clone() is called. * From b8e724fd71173fad2d7d6d70d18d8b3379cd4626 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Tue, 12 Jan 2021 19:52:56 +0800 Subject: [PATCH 015/414] doc/zh_CN: add mips index.rst translation This patch translates Documentation/mips/index.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Reviewed-by: Jiaxun Yang Link: https://lore.kernel.org/r/20210112115259.217944-1-siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/mips/index.rst | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 Documentation/translations/zh_CN/mips/index.rst diff --git a/Documentation/translations/zh_CN/mips/index.rst b/Documentation/translations/zh_CN/mips/index.rst new file mode 100644 index 000000000000..2c7b836a3da5 --- /dev/null +++ b/Documentation/translations/zh_CN/mips/index.rst @@ -0,0 +1,29 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: :ref:`Documentation/mips/index.rst ` +:Translator: Yanteng Si + +.. _cn_index: + + +=========================== +MIPS特性文档 +=========================== + +.. toctree:: + :maxdepth: 2 + :numbered: + + booting + ingenic-tcu + + features + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` From 7fd3954b0c523bdfdf98bf8e637387e33d497bf1 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Tue, 12 Jan 2021 19:52:57 +0800 Subject: [PATCH 016/414] doc/zh_CN: add mips booting.rst translation This patch translates Documentation/mips/booting.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/20210112115259.217944-2-siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/mips/booting.rst | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 Documentation/translations/zh_CN/mips/booting.rst diff --git a/Documentation/translations/zh_CN/mips/booting.rst b/Documentation/translations/zh_CN/mips/booting.rst new file mode 100644 index 000000000000..3099d0fff7a6 --- /dev/null +++ b/Documentation/translations/zh_CN/mips/booting.rst @@ -0,0 +1,31 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: :ref:`Documentation/mips/booting.rst ` +:Translator: Yanteng Si + +.. _cn_booting: + +BMIPS设备树引导 +------------------------ + + 一些bootloaders只支持在内核镜像开始地址处的单一入口点。而其它 + bootloaders将跳转到ELF的开始地址处。两种方案都支持的;因为 + CONFIG_BOOT_RAW=y and CONFIG_NO_EXCEPT_FILL=y, 所以第一条指令 + 会立即跳转到kernel_entry()入口处执行。 + + 与arch/arm情况(b)类似,dt感知的引导加载程序需要设置以下寄存器: + + a0 : 0 + + a1 : 0xffffffff + + a2 : RAM中指向设备树块的物理指针(在chapterII中定义)。 + 设备树可以位于前512MB物理地址空间(0x00000000 - + 0x1fffffff)的任何位置,以64位边界对齐。 + + 传统bootloaders不会使用这样的约定,并且它们不传入DT块。 + 在这种情况下,Linux将通过选中CONFIG_DT_*查找DTB。 + + 以上约定只在32位系统中定义,因为目前没有任何64位的BMIPS实现。 From 72bc9d08868d267d36a5be511f7e1e0a23e75cd5 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Tue, 12 Jan 2021 19:52:58 +0800 Subject: [PATCH 017/414] doc/zh_CN: add mips features.rst translation This patch translates Documentation/mips/features.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/20210112115259.217944-3-siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/mips/features.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 Documentation/translations/zh_CN/mips/features.rst diff --git a/Documentation/translations/zh_CN/mips/features.rst b/Documentation/translations/zh_CN/mips/features.rst new file mode 100644 index 000000000000..7e67f81a0982 --- /dev/null +++ b/Documentation/translations/zh_CN/mips/features.rst @@ -0,0 +1,10 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: :ref:`Documentation/mips/features.rst ` +:Translator: Yanteng Si + +.. _cn_features: + +.. kernel-feat:: $srctree/Documentation/features mips From 419b1d4ed1cb9a7167b53ec2d9ae5ff56c6ee4a8 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Tue, 12 Jan 2021 19:52:59 +0800 Subject: [PATCH 018/414] doc/zh_CN: add mips ingenic-tcu.rst translation This patch translates Documentation/mips/ingenic-tcu.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/20210112115259.217944-4-siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/mips/ingenic-tcu.rst | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 Documentation/translations/zh_CN/mips/ingenic-tcu.rst diff --git a/Documentation/translations/zh_CN/mips/ingenic-tcu.rst b/Documentation/translations/zh_CN/mips/ingenic-tcu.rst new file mode 100644 index 000000000000..72b5d409ed89 --- /dev/null +++ b/Documentation/translations/zh_CN/mips/ingenic-tcu.rst @@ -0,0 +1,69 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: :ref:`Documentation/mips/ingenic-tcu.rst ` +:Translator: Yanteng Si + +.. _cn_ingenic-tcu: + +=============================================== +君正 JZ47xx SoC定时器/计数器硬件单元 +=============================================== + +君正 JZ47xx SoC中的定时器/计数器单元(TCU)是一个多功能硬件块。它有多达 +8个通道,可以用作计数器,计时器,或脉冲宽度调制器。 + +- JZ4725B, JZ4750, JZ4755 只有6个TCU通道。其它SoC都有8个通道。 + +- JZ4725B引入了一个独立的通道,称为操作系统计时器(OST)。这是一个32位可 + 编程定时器。在JZ4760B及以上型号上,它是64位的。 + +- 每个TCU通道都有自己的时钟源,可以通过 TCSR 寄存器设置通道的父级时钟 + 源(pclk、ext、rtc)、开关以及分频。 + + - 看门狗和OST硬件模块在它们的寄存器空间中也有相同形式的TCSR寄存器。 + - 用于关闭/开启的 TCU 寄存器也可以关闭/开启看门狗和 OST 时钟。 + +- 每个TCU通道在两种模式的其中一种模式下运行: + + - 模式 TCU1:通道无法在睡眠模式下运行,但更易于操作。 + - 模式 TCU2:通道可以在睡眠模式下运行,但操作比 TCU1 通道复杂一些。 + +- 每个 TCU 通道的模式取决于使用的SoC: + + - 在最老的SoC(高于JZ4740),八个通道都运行在TCU1模式。 + - 在 JZ4725B,通道5运行在TCU2,其它通道则运行在TCU1。 + - 在最新的SoC(JZ4750及之后),通道1-2运行在TCU2,其它通道则运行 + 在TCU1。 + +- 每个通道都可以生成中断。有些通道共享一条中断线,而有些没有,其在SoC型 + 号之间的变更: + + - 在很老的SoC(JZ4740及更低),通道0和通道1有它们自己的中断线;通 + 道2-7共享最后一条中断线。 + - 在 JZ4725B,通道0有它自己的中断线;通道1-5共享一条中断线;OST + 使用最后一条中断线。 + - 在比较新的SoC(JZ4750及以后),通道5有它自己的中断线;通 + 道0-4和(如果是8通道)6-7全部共享一条中断线;OST使用最后一条中 + 断线。 + +实现 +==== + +TCU硬件的功能分布在多个驱动程序: + +=========== ===== +时钟 drivers/clk/ingenic/tcu.c +中断 drivers/irqchip/irq-ingenic-tcu.c +定时器 drivers/clocksource/ingenic-timer.c +OST drivers/clocksource/ingenic-ost.c +脉冲宽度调制器 drivers/pwm/pwm-jz4740.c +看门狗 drivers/watchdog/jz4740_wdt.c +=========== ===== + +因为可以从相同的寄存器控制属于不同驱动程序和框架的TCU的各种功能,所以 +所有这些驱动程序都通过相同的控制总线通用接口访问它们的寄存器。 + +有关TCU驱动程序的设备树绑定的更多信息,请参阅: +Documentation/devicetree/bindings/timer/ingenic,tcu.yaml. From bad07664a5a15948ff959a5dbdb5211d3655baea Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Wed, 16 Dec 2020 15:17:55 +0800 Subject: [PATCH 019/414] RDMA/rxe: Add check for supported QP types Current rdma_rxe only supports five QP types, attempting to create any others should return an error - the type check was missed. Link: https://lore.kernel.org/r/20201216071755.149449-2-yangx.jy@cn.fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 656a5b4be847..65c8df812aeb 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -62,6 +62,17 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) struct rxe_port *port; int port_num = init->port_num; + switch(init->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + break; + default: + return -EOPNOTSUPP; + } + if (!init->recv_cq || !init->send_cq) { pr_warn("missing cq\n"); goto err1; From 1d11c1b7f9ff28196d66d995f11fcf3101301fe9 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 16 Dec 2020 17:15:44 -0600 Subject: [PATCH 020/414] RDMA/rxe: Remove unneeded RXE_POOL_ATOMIC flag Remove RXE_POOL_ATOMIC flag from rxe_type_info for AH objects. These objects are now allocated by rdma/core so there is no further reason for this flag. Link: https://lore.kernel.org/r/20201216231550.27224-2-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index b374eb53e2fe..11571ff64a8f 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -25,7 +25,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_AH] = { .name = "rxe-ah", .size = sizeof(struct rxe_ah), - .flags = RXE_POOL_ATOMIC | RXE_POOL_NO_ALLOC, + .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_SRQ] = { .name = "rxe-srq", From c06ee3a0147e016b6127eb25432739a4f4b151f5 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 16 Dec 2020 17:15:45 -0600 Subject: [PATCH 021/414] RDMA/rxe: Let pools support both keys and indices Allow both indices and keys to exist for objects in pools. Previously you were limited to one or the other. This is required for later implementing rxe memory windows. Link: https://lore.kernel.org/r/20201216231550.27224-3-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 73 ++++++++++++++-------------- drivers/infiniband/sw/rxe/rxe_pool.h | 32 +++++++----- 2 files changed, 58 insertions(+), 47 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 11571ff64a8f..f1ecf5983742 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -94,18 +94,18 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) goto out; } - pool->max_index = max; - pool->min_index = min; + pool->index.max_index = max; + pool->index.min_index = min; size = BITS_TO_LONGS(max - min + 1) * sizeof(long); - pool->table = kmalloc(size, GFP_KERNEL); - if (!pool->table) { + pool->index.table = kmalloc(size, GFP_KERNEL); + if (!pool->index.table) { err = -ENOMEM; goto out; } - pool->table_size = size; - bitmap_zero(pool->table, max - min + 1); + pool->index.table_size = size; + bitmap_zero(pool->index.table, max - min + 1); out: return err; @@ -127,7 +127,8 @@ int rxe_pool_init( pool->max_elem = max_elem; pool->elem_size = ALIGN(size, RXE_POOL_ALIGN); pool->flags = rxe_type_info[type].flags; - pool->tree = RB_ROOT; + pool->index.tree = RB_ROOT; + pool->key.tree = RB_ROOT; pool->cleanup = rxe_type_info[type].cleanup; atomic_set(&pool->num_elem, 0); @@ -145,8 +146,8 @@ int rxe_pool_init( } if (rxe_type_info[type].flags & RXE_POOL_KEY) { - pool->key_offset = rxe_type_info[type].key_offset; - pool->key_size = rxe_type_info[type].key_size; + pool->key.key_offset = rxe_type_info[type].key_offset; + pool->key.key_size = rxe_type_info[type].key_size; } pool->state = RXE_POOL_STATE_VALID; @@ -160,7 +161,7 @@ static void rxe_pool_release(struct kref *kref) struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt); pool->state = RXE_POOL_STATE_INVALID; - kfree(pool->table); + kfree(pool->index.table); } static void rxe_pool_put(struct rxe_pool *pool) @@ -185,27 +186,27 @@ void rxe_pool_cleanup(struct rxe_pool *pool) static u32 alloc_index(struct rxe_pool *pool) { u32 index; - u32 range = pool->max_index - pool->min_index + 1; + u32 range = pool->index.max_index - pool->index.min_index + 1; - index = find_next_zero_bit(pool->table, range, pool->last); + index = find_next_zero_bit(pool->index.table, range, pool->index.last); if (index >= range) - index = find_first_zero_bit(pool->table, range); + index = find_first_zero_bit(pool->index.table, range); WARN_ON_ONCE(index >= range); - set_bit(index, pool->table); - pool->last = index; - return index + pool->min_index; + set_bit(index, pool->index.table); + pool->index.last = index; + return index + pool->index.min_index; } static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new) { - struct rb_node **link = &pool->tree.rb_node; + struct rb_node **link = &pool->index.tree.rb_node; struct rb_node *parent = NULL; struct rxe_pool_entry *elem; while (*link) { parent = *link; - elem = rb_entry(parent, struct rxe_pool_entry, node); + elem = rb_entry(parent, struct rxe_pool_entry, index_node); if (elem->index == new->index) { pr_warn("element already exists!\n"); @@ -218,25 +219,25 @@ static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new) link = &(*link)->rb_right; } - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, &pool->tree); + rb_link_node(&new->index_node, parent, link); + rb_insert_color(&new->index_node, &pool->index.tree); out: return; } static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) { - struct rb_node **link = &pool->tree.rb_node; + struct rb_node **link = &pool->key.tree.rb_node; struct rb_node *parent = NULL; struct rxe_pool_entry *elem; int cmp; while (*link) { parent = *link; - elem = rb_entry(parent, struct rxe_pool_entry, node); + elem = rb_entry(parent, struct rxe_pool_entry, key_node); - cmp = memcmp((u8 *)elem + pool->key_offset, - (u8 *)new + pool->key_offset, pool->key_size); + cmp = memcmp((u8 *)elem + pool->key.key_offset, + (u8 *)new + pool->key.key_offset, pool->key.key_size); if (cmp == 0) { pr_warn("key already exists!\n"); @@ -249,8 +250,8 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) link = &(*link)->rb_right; } - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, &pool->tree); + rb_link_node(&new->key_node, parent, link); + rb_insert_color(&new->key_node, &pool->key.tree); out: return; } @@ -262,7 +263,7 @@ void rxe_add_key(void *arg, void *key) unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - memcpy((u8 *)elem + pool->key_offset, key, pool->key_size); + memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size); insert_key(pool, elem); write_unlock_irqrestore(&pool->pool_lock, flags); } @@ -274,7 +275,7 @@ void rxe_drop_key(void *arg) unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - rb_erase(&elem->node, &pool->tree); + rb_erase(&elem->key_node, &pool->key.tree); write_unlock_irqrestore(&pool->pool_lock, flags); } @@ -297,8 +298,8 @@ void rxe_drop_index(void *arg) unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - clear_bit(elem->index - pool->min_index, pool->table); - rb_erase(&elem->node, &pool->tree); + clear_bit(elem->index - pool->index.min_index, pool->index.table); + rb_erase(&elem->index_node, &pool->index.tree); write_unlock_irqrestore(&pool->pool_lock, flags); } @@ -402,10 +403,10 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) if (pool->state != RXE_POOL_STATE_VALID) goto out; - node = pool->tree.rb_node; + node = pool->index.tree.rb_node; while (node) { - elem = rb_entry(node, struct rxe_pool_entry, node); + elem = rb_entry(node, struct rxe_pool_entry, index_node); if (elem->index > index) node = node->rb_left; @@ -434,13 +435,13 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key) if (pool->state != RXE_POOL_STATE_VALID) goto out; - node = pool->tree.rb_node; + node = pool->key.tree.rb_node; while (node) { - elem = rb_entry(node, struct rxe_pool_entry, node); + elem = rb_entry(node, struct rxe_pool_entry, key_node); - cmp = memcmp((u8 *)elem + pool->key_offset, - key, pool->key_size); + cmp = memcmp((u8 *)elem + pool->key.key_offset, + key, pool->key.key_size); if (cmp > 0) node = node->rb_left; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 432745ffc8d4..3d722aae5f15 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -56,8 +56,11 @@ struct rxe_pool_entry { struct kref ref_cnt; struct list_head list; - /* only used if indexed or keyed */ - struct rb_node node; + /* only used if keyed */ + struct rb_node key_node; + + /* only used if indexed */ + struct rb_node index_node; u32 index; }; @@ -74,15 +77,22 @@ struct rxe_pool { unsigned int max_elem; atomic_t num_elem; - /* only used if indexed or keyed */ - struct rb_root tree; - unsigned long *table; - size_t table_size; - u32 max_index; - u32 min_index; - u32 last; - size_t key_offset; - size_t key_size; + /* only used if indexed */ + struct { + struct rb_root tree; + unsigned long *table; + size_t table_size; + u32 last; + u32 max_index; + u32 min_index; + } index; + + /* only used if keyed */ + struct { + struct rb_root tree; + size_t key_offset; + size_t key_size; + } key; }; /* initialize a pool of objects with given limit on From b994d49ef4afa28dc335ee2b4b734939c7a1d95f Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 16 Dec 2020 17:15:46 -0600 Subject: [PATCH 022/414] RDMA/rxe: Add elem_offset field to rxe_type_info The rxe verbs objects each include an rdma-core object 'ib_xxx' and a rxe_pool_entry 'pelem' in addition to rxe specific data. Originally these all had pelem first and ib_xxx second. Currently about half have ib_xxx first and half have pelem first. Saving the offset of the pelem field in rxe_type info will enable making the rxe_pool APIs type safe as the pelem field continues to vary. Link: https://lore.kernel.org/r/20201216231550.27224-4-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 10 ++++++++++ drivers/infiniband/sw/rxe/rxe_pool.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index f1ecf5983742..4d667b78af9b 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -15,21 +15,25 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { .name = "rxe-uc", .size = sizeof(struct rxe_ucontext), + .elem_offset = offsetof(struct rxe_ucontext, pelem), .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_PD] = { .name = "rxe-pd", .size = sizeof(struct rxe_pd), + .elem_offset = offsetof(struct rxe_pd, pelem), .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_AH] = { .name = "rxe-ah", .size = sizeof(struct rxe_ah), + .elem_offset = offsetof(struct rxe_ah, pelem), .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_SRQ] = { .name = "rxe-srq", .size = sizeof(struct rxe_srq), + .elem_offset = offsetof(struct rxe_srq, pelem), .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, @@ -37,6 +41,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_QP] = { .name = "rxe-qp", .size = sizeof(struct rxe_qp), + .elem_offset = offsetof(struct rxe_qp, pelem), .cleanup = rxe_qp_cleanup, .flags = RXE_POOL_INDEX, .min_index = RXE_MIN_QP_INDEX, @@ -45,12 +50,14 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_CQ] = { .name = "rxe-cq", .size = sizeof(struct rxe_cq), + .elem_offset = offsetof(struct rxe_cq, pelem), .flags = RXE_POOL_NO_ALLOC, .cleanup = rxe_cq_cleanup, }, [RXE_TYPE_MR] = { .name = "rxe-mr", .size = sizeof(struct rxe_mem), + .elem_offset = offsetof(struct rxe_mem, pelem), .cleanup = rxe_mem_cleanup, .flags = RXE_POOL_INDEX, .max_index = RXE_MAX_MR_INDEX, @@ -59,6 +66,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_MW] = { .name = "rxe-mw", .size = sizeof(struct rxe_mem), + .elem_offset = offsetof(struct rxe_mem, pelem), .flags = RXE_POOL_INDEX, .max_index = RXE_MAX_MW_INDEX, .min_index = RXE_MIN_MW_INDEX, @@ -66,6 +74,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_MC_GRP] = { .name = "rxe-mc_grp", .size = sizeof(struct rxe_mc_grp), + .elem_offset = offsetof(struct rxe_mc_grp, pelem), .cleanup = rxe_mc_cleanup, .flags = RXE_POOL_KEY, .key_offset = offsetof(struct rxe_mc_grp, mgid), @@ -74,6 +83,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_MC_ELEM] = { .name = "rxe-mc_elem", .size = sizeof(struct rxe_mc_elem), + .elem_offset = offsetof(struct rxe_mc_elem, pelem), .flags = RXE_POOL_ATOMIC, }, }; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 3d722aae5f15..b37df6198e8a 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -36,6 +36,7 @@ struct rxe_pool_entry; struct rxe_type_info { const char *name; size_t size; + size_t elem_offset; void (*cleanup)(struct rxe_pool_entry *obj); enum rxe_pool_flags flags; u32 max_index; From 2622aa718a6a774ba302ca002adc62eeab9cdf28 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 16 Dec 2020 17:15:47 -0600 Subject: [PATCH 023/414] RDMA/rxe: Make pool lookup and alloc APIs type safe The allocate, lookup index, lookup key and cleanup routines in rxe_pool.c currently are not type safe against relocating the pelem field in the objects. Planned changes to move allocation of objects into rdma-core make addressing this a requirement. Use the elem_offset field in rxe_type_info make these APIs safe against moving the pelem field. Link: https://lore.kernel.org/r/20201216231550.27224-5-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 55 +++++++++++++++++++--------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 4d667b78af9b..2873ecfb84c2 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -315,7 +315,9 @@ void rxe_drop_index(void *arg) void *rxe_alloc(struct rxe_pool *pool) { + struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rxe_pool_entry *elem; + u8 *obj; unsigned long flags; might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); @@ -334,16 +336,17 @@ void *rxe_alloc(struct rxe_pool *pool) if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; - elem = kzalloc(rxe_type_info[pool->type].size, - (pool->flags & RXE_POOL_ATOMIC) ? - GFP_ATOMIC : GFP_KERNEL); - if (!elem) + obj = kzalloc(info->size, (pool->flags & RXE_POOL_ATOMIC) ? + GFP_ATOMIC : GFP_KERNEL); + if (!obj) goto out_cnt; + elem = (struct rxe_pool_entry *)(obj + info->elem_offset); + elem->pool = pool; kref_init(&elem->ref_cnt); - return elem; + return obj; out_cnt: atomic_dec(&pool->num_elem); @@ -391,12 +394,17 @@ void rxe_elem_release(struct kref *kref) struct rxe_pool_entry *elem = container_of(kref, struct rxe_pool_entry, ref_cnt); struct rxe_pool *pool = elem->pool; + struct rxe_type_info *info = &rxe_type_info[pool->type]; + u8 *obj; if (pool->cleanup) pool->cleanup(elem); - if (!(pool->flags & RXE_POOL_NO_ALLOC)) - kfree(elem); + if (!(pool->flags & RXE_POOL_NO_ALLOC)) { + obj = (u8 *)elem - info->elem_offset; + kfree(obj); + } + atomic_dec(&pool->num_elem); ib_device_put(&pool->rxe->ib_dev); rxe_pool_put(pool); @@ -404,8 +412,10 @@ void rxe_elem_release(struct kref *kref) void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) { - struct rb_node *node = NULL; - struct rxe_pool_entry *elem = NULL; + struct rxe_type_info *info = &rxe_type_info[pool->type]; + struct rb_node *node; + struct rxe_pool_entry *elem; + u8 *obj = NULL; unsigned long flags; read_lock_irqsave(&pool->pool_lock, flags); @@ -422,21 +432,28 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) node = node->rb_left; else if (elem->index < index) node = node->rb_right; - else { - kref_get(&elem->ref_cnt); + else break; - } + } + + if (node) { + kref_get(&elem->ref_cnt); + obj = (u8 *)elem - info->elem_offset; + } else { + obj = NULL; } out: read_unlock_irqrestore(&pool->pool_lock, flags); - return node ? elem : NULL; + return obj; } void *rxe_pool_get_key(struct rxe_pool *pool, void *key) { - struct rb_node *node = NULL; - struct rxe_pool_entry *elem = NULL; + struct rxe_type_info *info = &rxe_type_info[pool->type]; + struct rb_node *node; + struct rxe_pool_entry *elem; + u8 *obj = NULL; int cmp; unsigned long flags; @@ -461,10 +478,14 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key) break; } - if (node) + if (node) { kref_get(&elem->ref_cnt); + obj = (u8 *)elem - info->elem_offset; + } else { + obj = NULL; + } out: read_unlock_irqrestore(&pool->pool_lock, flags); - return node ? elem : NULL; + return obj; } From 91a42c5becb685e1ae73554726906dfe74a8afdd Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 16 Dec 2020 17:15:48 -0600 Subject: [PATCH 024/414] RDMA/rxe: Make add/drop key/index APIs type safe Replace 'void *' parameters with 'struct rxe_pool_entry *' and use a macro to allow: rxe_add_index, rxe_drop_index, rxe_add_key, rxe_drop_key and rxe_add_to_pool APIs to be type safe against changing the position of pelem in the objects. Link: https://lore.kernel.org/r/20201216231550.27224-6-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 14 +++++--------- drivers/infiniband/sw/rxe/rxe_pool.h | 20 +++++++++++++++----- drivers/infiniband/sw/rxe/rxe_verbs.c | 16 ++++++++-------- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 2873ecfb84c2..c8a6d0d7f01a 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -266,9 +266,8 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) return; } -void rxe_add_key(void *arg, void *key) +void __rxe_add_key(struct rxe_pool_entry *elem, void *key) { - struct rxe_pool_entry *elem = arg; struct rxe_pool *pool = elem->pool; unsigned long flags; @@ -278,9 +277,8 @@ void rxe_add_key(void *arg, void *key) write_unlock_irqrestore(&pool->pool_lock, flags); } -void rxe_drop_key(void *arg) +void __rxe_drop_key(struct rxe_pool_entry *elem) { - struct rxe_pool_entry *elem = arg; struct rxe_pool *pool = elem->pool; unsigned long flags; @@ -289,9 +287,8 @@ void rxe_drop_key(void *arg) write_unlock_irqrestore(&pool->pool_lock, flags); } -void rxe_add_index(void *arg) +void __rxe_add_index(struct rxe_pool_entry *elem) { - struct rxe_pool_entry *elem = arg; struct rxe_pool *pool = elem->pool; unsigned long flags; @@ -301,9 +298,8 @@ void rxe_add_index(void *arg) write_unlock_irqrestore(&pool->pool_lock, flags); } -void rxe_drop_index(void *arg) +void __rxe_drop_index(struct rxe_pool_entry *elem) { - struct rxe_pool_entry *elem = arg; struct rxe_pool *pool = elem->pool; unsigned long flags; @@ -356,7 +352,7 @@ void *rxe_alloc(struct rxe_pool *pool) return NULL; } -int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) +int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) { unsigned long flags; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index b37df6198e8a..5db0bdde185e 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -110,23 +110,33 @@ void rxe_pool_cleanup(struct rxe_pool *pool); void *rxe_alloc(struct rxe_pool *pool); /* connect already allocated object to pool */ -int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); +int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); + +#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->pelem) /* assign an index to an indexed object and insert object into * pool's rb tree */ -void rxe_add_index(void *elem); +void __rxe_add_index(struct rxe_pool_entry *elem); + +#define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem) /* drop an index and remove object from rb tree */ -void rxe_drop_index(void *elem); +void __rxe_drop_index(struct rxe_pool_entry *elem); + +#define rxe_drop_index(obj) __rxe_drop_index(&(obj)->pelem) /* assign a key to a keyed object and insert object into * pool's rb tree */ -void rxe_add_key(void *elem, void *key); +void __rxe_add_key(struct rxe_pool_entry *elem, void *key); + +#define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key) /* remove elem from rb tree */ -void rxe_drop_key(void *elem); +void __rxe_drop_key(struct rxe_pool_entry *elem); + +#define rxe_drop_key(obj) __rxe_drop_key(&(obj)->pelem) /* lookup an indexed object from index. takes a reference on object */ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index a031514e2f41..7483a33bcec5 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -106,12 +106,12 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, return IB_LINK_LAYER_ETHERNET; } -static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) +static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata) { - struct rxe_dev *rxe = to_rdev(uctx->device); - struct rxe_ucontext *uc = to_ruc(uctx); + struct rxe_dev *rxe = to_rdev(ibuc->device); + struct rxe_ucontext *uc = to_ruc(ibuc); - return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem); + return rxe_add_to_pool(&rxe->uc_pool, uc); } static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc) @@ -145,7 +145,7 @@ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); - return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem); + return rxe_add_to_pool(&rxe->pd_pool, pd); } static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) @@ -169,7 +169,7 @@ static int rxe_create_ah(struct ib_ah *ibah, if (err) return err; - err = rxe_add_to_pool(&rxe->ah_pool, &ah->pelem); + err = rxe_add_to_pool(&rxe->ah_pool, ah); if (err) return err; @@ -273,7 +273,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, if (err) goto err1; - err = rxe_add_to_pool(&rxe->srq_pool, &srq->pelem); + err = rxe_add_to_pool(&rxe->srq_pool, srq); if (err) goto err1; @@ -774,7 +774,7 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (err) return err; - return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem); + return rxe_add_to_pool(&rxe->cq_pool, cq); } static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) From 3853c35e243d56238159e8365b6aca410bdd4576 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 16 Dec 2020 17:15:49 -0600 Subject: [PATCH 025/414] RDMA/rxe: Add unlocked versions of pool APIs The existing pool APIs use the rw_lock pool_lock to protect critical sections that change the pool state. This does not correctly implement a typical sequence like the following elem = if found use elem else elem = Which is racy if multiple threads are attempting to perform this at the same time. We want the second thread to use the elem created by the first thread not create two equivalent elems. This patch adds new APIs that are the same as existing APIs but do not take the pool_lock. A caller can then take the lock and perform a sequence of pool operations and then release the lock. Link: https://lore.kernel.org/r/20201216231550.27224-7-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 84 +++++++++++++++++++++------- drivers/infiniband/sw/rxe/rxe_pool.h | 41 ++++++++++++-- 2 files changed, 98 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index c8a6d0d7f01a..d26730eec720 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -266,65 +266,89 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) return; } +void __rxe_add_key_nl(struct rxe_pool_entry *elem, void *key) +{ + struct rxe_pool *pool = elem->pool; + + memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size); + insert_key(pool, elem); +} + void __rxe_add_key(struct rxe_pool_entry *elem, void *key) { struct rxe_pool *pool = elem->pool; unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size); - insert_key(pool, elem); + __rxe_add_key_nl(elem, key); write_unlock_irqrestore(&pool->pool_lock, flags); } +void __rxe_drop_key_nl(struct rxe_pool_entry *elem) +{ + struct rxe_pool *pool = elem->pool; + + rb_erase(&elem->key_node, &pool->key.tree); +} + void __rxe_drop_key(struct rxe_pool_entry *elem) { struct rxe_pool *pool = elem->pool; unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - rb_erase(&elem->key_node, &pool->key.tree); + __rxe_drop_key_nl(elem); write_unlock_irqrestore(&pool->pool_lock, flags); } +void __rxe_add_index_nl(struct rxe_pool_entry *elem) +{ + struct rxe_pool *pool = elem->pool; + + elem->index = alloc_index(pool); + insert_index(pool, elem); +} + void __rxe_add_index(struct rxe_pool_entry *elem) { struct rxe_pool *pool = elem->pool; unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - elem->index = alloc_index(pool); - insert_index(pool, elem); + __rxe_add_index_nl(elem); write_unlock_irqrestore(&pool->pool_lock, flags); } +void __rxe_drop_index_nl(struct rxe_pool_entry *elem) +{ + struct rxe_pool *pool = elem->pool; + + clear_bit(elem->index - pool->index.min_index, pool->index.table); + rb_erase(&elem->index_node, &pool->index.tree); +} + void __rxe_drop_index(struct rxe_pool_entry *elem) { struct rxe_pool *pool = elem->pool; unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - clear_bit(elem->index - pool->index.min_index, pool->index.table); - rb_erase(&elem->index_node, &pool->index.tree); + __rxe_drop_index_nl(elem); write_unlock_irqrestore(&pool->pool_lock, flags); } -void *rxe_alloc(struct rxe_pool *pool) +void *rxe_alloc_nl(struct rxe_pool *pool) { struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rxe_pool_entry *elem; u8 *obj; - unsigned long flags; might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); - read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) { - read_unlock_irqrestore(&pool->pool_lock, flags); + if (pool->state != RXE_POOL_STATE_VALID) return NULL; - } + kref_get(&pool->ref_cnt); - read_unlock_irqrestore(&pool->pool_lock, flags); if (!ib_device_try_get(&pool->rxe->ib_dev)) goto out_put_pool; @@ -352,12 +376,22 @@ void *rxe_alloc(struct rxe_pool *pool) return NULL; } +void *rxe_alloc(struct rxe_pool *pool) +{ + u8 *obj; + unsigned long flags; + + read_lock_irqsave(&pool->pool_lock, flags); + obj = rxe_alloc_nl(pool); + read_unlock_irqrestore(&pool->pool_lock, flags); + + return obj; +} + int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) { unsigned long flags; - might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); - read_lock_irqsave(&pool->pool_lock, flags); if (pool->state != RXE_POOL_STATE_VALID) { read_unlock_irqrestore(&pool->pool_lock, flags); @@ -444,16 +478,13 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) return obj; } -void *rxe_pool_get_key(struct rxe_pool *pool, void *key) +void *rxe_pool_get_key_nl(struct rxe_pool *pool, void *key) { struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rb_node *node; struct rxe_pool_entry *elem; u8 *obj = NULL; int cmp; - unsigned long flags; - - read_lock_irqsave(&pool->pool_lock, flags); if (pool->state != RXE_POOL_STATE_VALID) goto out; @@ -482,6 +513,17 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key) } out: - read_unlock_irqrestore(&pool->pool_lock, flags); + return obj; +} + +void *rxe_pool_get_key(struct rxe_pool *pool, void *key) +{ + u8 *obj = NULL; + unsigned long flags; + + read_lock_irqsave(&pool->pool_lock, flags); + obj = rxe_pool_get_key_nl(pool, key); + read_unlock_irqrestore(&pool->pool_lock, flags); + return obj; } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 5db0bdde185e..373e08554c1c 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -109,41 +109,70 @@ void rxe_pool_cleanup(struct rxe_pool *pool); /* allocate an object from pool */ void *rxe_alloc(struct rxe_pool *pool); +/* allocate an object from pool - no lock */ +void *rxe_alloc_nl(struct rxe_pool *pool); + /* connect already allocated object to pool */ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); #define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->pelem) /* assign an index to an indexed object and insert object into - * pool's rb tree + * pool's rb tree with and without holding the pool_lock */ void __rxe_add_index(struct rxe_pool_entry *elem); #define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem) -/* drop an index and remove object from rb tree */ +void __rxe_add_index_nl(struct rxe_pool_entry *elem); + +#define rxe_add_index_nl(obj) __rxe_add_index_nl(&(obj)->pelem) + +/* drop an index and remove object from rb tree + * with and without holding the pool_lock + */ void __rxe_drop_index(struct rxe_pool_entry *elem); #define rxe_drop_index(obj) __rxe_drop_index(&(obj)->pelem) +void __rxe_drop_index_nl(struct rxe_pool_entry *elem); + +#define rxe_drop_index_nl(obj) __rxe_drop_index_nl(&(obj)->pelem) + /* assign a key to a keyed object and insert object into - * pool's rb tree + * pool's rb tree with and without holding pool_lock */ void __rxe_add_key(struct rxe_pool_entry *elem, void *key); #define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key) -/* remove elem from rb tree */ +void __rxe_add_key_nl(struct rxe_pool_entry *elem, void *key); + +#define rxe_add_key_nl(obj, key) __rxe_add_key_nl(&(obj)->pelem, key) + +/* remove elem from rb tree with and without holding pool_lock */ void __rxe_drop_key(struct rxe_pool_entry *elem); #define rxe_drop_key(obj) __rxe_drop_key(&(obj)->pelem) -/* lookup an indexed object from index. takes a reference on object */ +void __rxe_drop_key_nl(struct rxe_pool_entry *elem); + +#define rxe_drop_key_nl(obj) __rxe_drop_key_nl(&(obj)->pelem) + +/* lookup an indexed object from index with and without holding pool_lock. + * takes a reference on object + */ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index); -/* lookup keyed object from key. takes a reference on the object */ +void *rxe_pool_get_index_nl(struct rxe_pool *pool, u32 index); + +/* lookup keyed object from key with and without holding pool_lock. + * takes a reference on the objecti + */ void *rxe_pool_get_key(struct rxe_pool *pool, void *key); +void *rxe_pool_get_key_nl(struct rxe_pool *pool, void *key); + /* cleanup an object when all references are dropped */ void rxe_elem_release(struct kref *kref); From 8a48ac7f6c24973863b42d03156d5e34c46c2971 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 16 Dec 2020 17:15:50 -0600 Subject: [PATCH 026/414] RDMA/rxe: Fix race in rxe_mcast.c Fix a race in rxe_mcast.c that occurs when two QPs try at the same time to attach a multicast address. Both QPs lookup the mgid address in a pool of multicast groups and if they do not find it create a new group elem. Fix this by locking the lookup/alloc/add key sequence and using the unlocked APIs added in this patch set. Link: https://lore.kernel.org/r/20201216231550.27224-8-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mcast.c | 64 +++++++++++++++++---------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index c02315aed8d1..5be47ce7d319 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -7,45 +7,61 @@ #include "rxe.h" #include "rxe_loc.h" +/* caller should hold mc_grp_pool->pool_lock */ +static struct rxe_mc_grp *create_grp(struct rxe_dev *rxe, + struct rxe_pool *pool, + union ib_gid *mgid) +{ + int err; + struct rxe_mc_grp *grp; + + grp = rxe_alloc_nl(&rxe->mc_grp_pool); + if (!grp) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&grp->qp_list); + spin_lock_init(&grp->mcg_lock); + grp->rxe = rxe; + rxe_add_key_nl(grp, mgid); + + err = rxe_mcast_add(rxe, mgid); + if (unlikely(err)) { + rxe_drop_key_nl(grp); + rxe_drop_ref(grp); + return ERR_PTR(err); + } + + return grp; +} + int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, struct rxe_mc_grp **grp_p) { int err; struct rxe_mc_grp *grp; + struct rxe_pool *pool = &rxe->mc_grp_pool; + unsigned long flags; - if (rxe->attr.max_mcast_qp_attach == 0) { - err = -EINVAL; - goto err1; - } + if (rxe->attr.max_mcast_qp_attach == 0) + return -EINVAL; - grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); + write_lock_irqsave(&pool->pool_lock, flags); + + grp = rxe_pool_get_key_nl(pool, mgid); if (grp) goto done; - grp = rxe_alloc(&rxe->mc_grp_pool); - if (!grp) { - err = -ENOMEM; - goto err1; + grp = create_grp(rxe, pool, mgid); + if (IS_ERR(grp)) { + write_unlock_irqrestore(&pool->pool_lock, flags); + err = PTR_ERR(grp); + return err; } - INIT_LIST_HEAD(&grp->qp_list); - spin_lock_init(&grp->mcg_lock); - grp->rxe = rxe; - - rxe_add_key(grp, mgid); - - err = rxe_mcast_add(rxe, mgid); - if (err) - goto err2; - done: + write_unlock_irqrestore(&pool->pool_lock, flags); *grp_p = grp; return 0; - -err2: - rxe_drop_ref(grp); -err1: - return err; } int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, From 174a6db25f0da2784f55fcaaca494e3fd1d1029f Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 13 Jan 2021 08:00:23 +0100 Subject: [PATCH 027/414] doc/zh_CN: adjust table markup in mips/ingenic-tcu.rst Commit 419b1d4ed1cb ("doc/zh_CN: add mips ingenic-tcu.rst translation") introduces a warning with make htmldocs: ./Documentation/translations/zh_CN/mips/ingenic-tcu.rst: 61: WARNING: Malformed table. Text in column margin in table line 6. Adjust the table markup to address this warning. Signed-off-by: Lukas Bulwahn Fixes: 419b1d4ed1cb ("doc/zh_CN: add mips ingenic-tcu.rst translation") Reviewed-by: Alex Shi Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20210113070023.25064-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/mips/ingenic-tcu.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/translations/zh_CN/mips/ingenic-tcu.rst b/Documentation/translations/zh_CN/mips/ingenic-tcu.rst index 72b5d409ed89..9324a0a26430 100644 --- a/Documentation/translations/zh_CN/mips/ingenic-tcu.rst +++ b/Documentation/translations/zh_CN/mips/ingenic-tcu.rst @@ -53,14 +53,14 @@ TCU硬件的功能分布在多个驱动程序: -=========== ===== +============== =================================== 时钟 drivers/clk/ingenic/tcu.c 中断 drivers/irqchip/irq-ingenic-tcu.c 定时器 drivers/clocksource/ingenic-timer.c OST drivers/clocksource/ingenic-ost.c 脉冲宽度调制器 drivers/pwm/pwm-jz4740.c 看门狗 drivers/watchdog/jz4740_wdt.c -=========== ===== +============== =================================== 因为可以从相同的寄存器控制属于不同驱动程序和框架的TCU的各种功能,所以 所有这些驱动程序都通过相同的控制总线通用接口访问它们的寄存器。 From f302589b2d2a02f911f46e4d5e7ab9841da4eebc Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 13 Jan 2021 08:05:57 +0100 Subject: [PATCH 028/414] doc/zh_CN: mips: use doc references instead The Chinese mips translations refer to non-existing labels in the original documentation. Hence, make htmldocs warns about those undefined labels on all files in ./Documentation/translations/zh_CN/mips/. Replace the references to non-existing labels with suitable doc references. Signed-off-by: Lukas Bulwahn Fixes: 419b1d4ed1cb ("doc/zh_CN: add mips ingenic-tcu.rst translation") Fixes: 72bc9d08868d ("doc/zh_CN: add mips features.rst translation") Fixes: 7fd3954b0c52 ("doc/zh_CN: add mips booting.rst translation") Fixes: b8e724fd7117 ("doc/zh_CN: add mips index.rst translation") Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20210113070557.28792-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/mips/booting.rst | 2 +- Documentation/translations/zh_CN/mips/features.rst | 2 +- Documentation/translations/zh_CN/mips/index.rst | 2 +- Documentation/translations/zh_CN/mips/ingenic-tcu.rst | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/translations/zh_CN/mips/booting.rst b/Documentation/translations/zh_CN/mips/booting.rst index 3099d0fff7a6..96453e1b962e 100644 --- a/Documentation/translations/zh_CN/mips/booting.rst +++ b/Documentation/translations/zh_CN/mips/booting.rst @@ -2,7 +2,7 @@ .. include:: ../disclaimer-zh_CN.rst -:Original: :ref:`Documentation/mips/booting.rst ` +:Original: :doc:`../../../mips/booting` :Translator: Yanteng Si .. _cn_booting: diff --git a/Documentation/translations/zh_CN/mips/features.rst b/Documentation/translations/zh_CN/mips/features.rst index 7e67f81a0982..93d93d06b1b3 100644 --- a/Documentation/translations/zh_CN/mips/features.rst +++ b/Documentation/translations/zh_CN/mips/features.rst @@ -2,7 +2,7 @@ .. include:: ../disclaimer-zh_CN.rst -:Original: :ref:`Documentation/mips/features.rst ` +:Original: :doc:`../../../mips/features` :Translator: Yanteng Si .. _cn_features: diff --git a/Documentation/translations/zh_CN/mips/index.rst b/Documentation/translations/zh_CN/mips/index.rst index 2c7b836a3da5..27a2eae8484a 100644 --- a/Documentation/translations/zh_CN/mips/index.rst +++ b/Documentation/translations/zh_CN/mips/index.rst @@ -2,7 +2,7 @@ .. include:: ../disclaimer-zh_CN.rst -:Original: :ref:`Documentation/mips/index.rst ` +:Original: :doc:`../../../mips/index` :Translator: Yanteng Si .. _cn_index: diff --git a/Documentation/translations/zh_CN/mips/ingenic-tcu.rst b/Documentation/translations/zh_CN/mips/ingenic-tcu.rst index 9324a0a26430..f04ba407384a 100644 --- a/Documentation/translations/zh_CN/mips/ingenic-tcu.rst +++ b/Documentation/translations/zh_CN/mips/ingenic-tcu.rst @@ -2,7 +2,7 @@ .. include:: ../disclaimer-zh_CN.rst -:Original: :ref:`Documentation/mips/ingenic-tcu.rst ` +:Original: :doc:`../../../mips/ingenic-tcu` :Translator: Yanteng Si .. _cn_ingenic-tcu: From 097d43d8570457c1af9b09fab15412697b177f35 Mon Sep 17 00:00:00 2001 From: Daeseok Youn Date: Thu, 14 Jan 2021 16:08:17 +0900 Subject: [PATCH 029/414] mm: memblock: remove return value of memblock_free_all() No one checks the return value of memblock_free_all(). Make the return value void. memblock_free_all() is used on mem_init() for each architecture, and the total count of freed pages will be added to _totalram_pages variable by calling totalram_pages_add(). so do not need to return total count of freed pages. Signed-off-by: Daeseok Youn Reviewed-by: David Hildenbrand Signed-off-by: Mike Rapoport --- include/linux/memblock.h | 2 +- mm/memblock.c | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b93c44b9121e..acf1e6a85066 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -117,7 +117,7 @@ int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); -unsigned long memblock_free_all(void); +void memblock_free_all(void); void reset_node_managed_pages(pg_data_t *pgdat); void reset_all_zones_managed_pages(void); diff --git a/mm/memblock.c b/mm/memblock.c index d24bcfa88d2f..daf06f2847ed 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2087,10 +2087,8 @@ void __init reset_all_zones_managed_pages(void) /** * memblock_free_all - release free pages to the buddy allocator - * - * Return: the number of pages actually released. */ -unsigned long __init memblock_free_all(void) +void __init memblock_free_all(void) { unsigned long pages; @@ -2099,8 +2097,6 @@ unsigned long __init memblock_free_all(void) pages = free_low_memory_core_early(); totalram_pages_add(pages); - - return pages; } #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK) From 7490fd1fe836ba3c7eda7a4b1cfd9e44389ffda5 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 17 Dec 2020 15:18:57 +0100 Subject: [PATCH 030/414] RDMA/rtrs: Extend ibtrs_cq_qp_create rtrs does not have same limit for both max_send_wr and max_recv_wr, To allow client and server set different values, export in a separate parameter for rtrs_cq_qp_create. Also fix the type accordingly, u32 should be used instead of u16. Fixes: c0894b3ea69d ("RDMA/rtrs: core: lib functions shared between client and server modules") Link: https://lore.kernel.org/r/20201217141915.56989-2-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 4 ++-- drivers/infiniband/ulp/rtrs/rtrs-pri.h | 5 +++-- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 5 +++-- drivers/infiniband/ulp/rtrs/rtrs.c | 14 ++++++++------ 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 67f86c405a26..719254fc83a1 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1511,7 +1511,7 @@ static void destroy_con(struct rtrs_clt_con *con) static int create_con_cq_qp(struct rtrs_clt_con *con) { struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - u16 wr_queue_size; + u32 wr_queue_size; int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; @@ -1573,7 +1573,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors; err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge, cq_vector, wr_queue_size, wr_queue_size, - IB_POLL_SOFTIRQ); + wr_queue_size, IB_POLL_SOFTIRQ); /* * In case of error we do not bother to clean previous allocations, * since destroy_con_cq_qp() must be called. diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index 3f2918671dbe..d5621e6fad1b 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -303,8 +303,9 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, struct ib_send_wr *head); int rtrs_cq_qp_create(struct rtrs_sess *rtrs_sess, struct rtrs_con *con, - u32 max_send_sge, int cq_vector, u16 cq_size, - u16 wr_queue_size, enum ib_poll_context poll_ctx); + u32 max_send_sge, int cq_vector, int cq_size, + u32 max_send_wr, u32 max_recv_wr, + enum ib_poll_context poll_ctx); void rtrs_cq_qp_destroy(struct rtrs_con *con); void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index c42fd470c4eb..ed4628f032bb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1586,7 +1586,7 @@ static int create_con(struct rtrs_srv_sess *sess, struct rtrs_sess *s = &sess->s; struct rtrs_srv_con *con; - u16 cq_size, wr_queue_size; + u32 cq_size, wr_queue_size; int err, cq_vector; con = kzalloc(sizeof(*con), GFP_KERNEL); @@ -1630,7 +1630,8 @@ static int create_con(struct rtrs_srv_sess *sess, /* TODO: SOFTIRQ can be faster, but be careful with softirq context */ err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_size, - wr_queue_size, IB_POLL_WORKQUEUE); + wr_queue_size, wr_queue_size, + IB_POLL_WORKQUEUE); if (err) { rtrs_err(s, "rtrs_cq_qp_create(), err: %d\n", err); goto free_con; diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 2e3a849e0a77..df52427f1710 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -231,14 +231,14 @@ static int create_cq(struct rtrs_con *con, int cq_vector, u16 cq_size, } static int create_qp(struct rtrs_con *con, struct ib_pd *pd, - u16 wr_queue_size, u32 max_sge) + u32 max_send_wr, u32 max_recv_wr, u32 max_sge) { struct ib_qp_init_attr init_attr = {NULL}; struct rdma_cm_id *cm_id = con->cm_id; int ret; - init_attr.cap.max_send_wr = wr_queue_size; - init_attr.cap.max_recv_wr = wr_queue_size; + init_attr.cap.max_send_wr = max_send_wr; + init_attr.cap.max_recv_wr = max_recv_wr; init_attr.cap.max_recv_sge = 1; init_attr.event_handler = qp_event_handler; init_attr.qp_context = con; @@ -260,8 +260,9 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd, } int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, - u32 max_send_sge, int cq_vector, u16 cq_size, - u16 wr_queue_size, enum ib_poll_context poll_ctx) + u32 max_send_sge, int cq_vector, int cq_size, + u32 max_send_wr, u32 max_recv_wr, + enum ib_poll_context poll_ctx) { int err; @@ -269,7 +270,8 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, if (err) return err; - err = create_qp(con, sess->dev->ib_pd, wr_queue_size, max_send_sge); + err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr, + max_send_sge); if (err) { ib_free_cq(con->cq); con->cq = NULL; From 99f0c3807973359bba8f37d9198eea59fe38c32a Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 17 Dec 2020 15:18:59 +0100 Subject: [PATCH 031/414] RDMA/rtrs-srv: Release lock before call into close_sess In this error case, we don't need hold mutex to call close_sess. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20201217141915.56989-4-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Tested-by: Lutz Pogrell Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index ed4628f032bb..341661f42add 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1863,8 +1863,8 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, return rtrs_rdma_do_reject(cm_id, -ECONNRESET); close_and_return_err: - close_sess(sess); mutex_unlock(&srv->paths_mutex); + close_sess(sess); return err; } From f991fdac813f1598a9bb17b602ce04812ba9148c Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 17 Dec 2020 15:19:00 +0100 Subject: [PATCH 032/414] RDMA/rtrs-srv: Use sysfs_remove_file_self for disconnect Remove self first to avoid deadlock, we don't want to use close_work to remove sess sysfs. Fixes: 91b11610af8d ("RDMA/rtrs: server: sysfs interface functions") Link: https://lore.kernel.org/r/20201217141915.56989-5-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Tested-by: Lutz Pogrell Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index d2edff3b8f0d..cca3a0acbabc 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -51,6 +51,8 @@ static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj, sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, str, sizeof(str)); rtrs_info(s, "disconnect for path %s requested\n", str); + /* first remove sysfs itself to avoid deadlock */ + sysfs_remove_file_self(&sess->kobj, &attr->attr); close_sess(sess); return count; From f47e4e3e71724f625958b0059f6c8ac5d44d27ef Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 17 Dec 2020 15:19:01 +0100 Subject: [PATCH 033/414] RDMA/rtrs-clt: Set mininum limit when create QP Currently rtrs when create_qp use a coarse numbers (bigger in general), which leads to hardware create more resources which only waste memory with no benefits. - SERVICE con, For max_send_wr/max_recv_wr, it's 2 times SERVICE_CON_QUEUE_DEPTH + 2 - IO con For max_send_wr/max_recv_wr, it's sess->queue_depth * 3 + 1 Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20201217141915.56989-6-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 719254fc83a1..b3fb5fb93815 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1511,7 +1511,7 @@ static void destroy_con(struct rtrs_clt_con *con) static int create_con_cq_qp(struct rtrs_clt_con *con) { struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - u32 wr_queue_size; + u32 max_send_wr, max_recv_wr, cq_size; int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; @@ -1523,7 +1523,8 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) * + 2 for drain and heartbeat * in case qp gets into error state */ - wr_queue_size = SERVICE_CON_QUEUE_DEPTH * 3 + 2; + max_send_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2; + max_recv_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2; /* We must be the first here */ if (WARN_ON(sess->s.dev)) return -EINVAL; @@ -1555,25 +1556,29 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) /* Shared between connections */ sess->s.dev_ref++; - wr_queue_size = + max_send_wr = min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, /* QD * (REQ + RSP + FR REGS or INVS) + drain */ sess->queue_depth * 3 + 1); + max_recv_wr = + min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, + sess->queue_depth * 3 + 1); } /* alloc iu to recv new rkey reply when server reports flags set */ if (sess->flags == RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { - con->rsp_ius = rtrs_iu_alloc(wr_queue_size, sizeof(*rsp), + con->rsp_ius = rtrs_iu_alloc(max_recv_wr, sizeof(*rsp), GFP_KERNEL, sess->s.dev->ib_dev, DMA_FROM_DEVICE, rtrs_clt_rdma_done); if (!con->rsp_ius) return -ENOMEM; - con->queue_size = wr_queue_size; + con->queue_size = max_recv_wr; } + cq_size = max_send_wr + max_recv_wr; cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors; err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge, - cq_vector, wr_queue_size, wr_queue_size, - wr_queue_size, IB_POLL_SOFTIRQ); + cq_vector, cq_size, max_send_wr, + max_recv_wr, IB_POLL_SOFTIRQ); /* * In case of error we do not bother to clean previous allocations, * since destroy_con_cq_qp() must be called. From f77c4839ee8f4612dcb6601602329096030bd813 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Dec 2020 15:19:02 +0100 Subject: [PATCH 034/414] RDMA/rtrs-srv: Jump to dereg_mr label if allocate iu fails The rtrs_iu_free is called in rtrs_iu_alloc if memory is limited, so we don't need to free the same iu again. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20201217141915.56989-7-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Reviewed-by: Gioh Kim Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 341661f42add..92a216ddd9fd 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -651,7 +651,7 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) if (!srv_mr->iu) { err = -ENOMEM; rtrs_err(ss, "rtrs_iu_alloc(), err: %d\n", err); - goto free_iu; + goto dereg_mr; } } /* Eventually dma addr for each chunk can be cached */ @@ -667,7 +667,6 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) srv_mr = &sess->mrs[mri]; sgt = &srv_mr->sgt; mr = srv_mr->mr; -free_iu: rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); dereg_mr: ib_dereg_mr(mr); From 424774c9f3fa100ef7d9cfb9ee211e2ba1cd5119 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Dec 2020 15:19:03 +0100 Subject: [PATCH 035/414] RDMA/rtrs: Call kobject_put in the failure path Per the comment of kobject_init_and_add, we need to free the memory by call kobject_put. Fixes: 215378b838df ("RDMA/rtrs: client: sysfs interface functions") Fixes: 91b11610af8d ("RDMA/rtrs: server: sysfs interface functions") Link: https://lore.kernel.org/r/20201217141915.56989-8-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Reviewed-by: Md Haris Iqbal Reviewed-by: Gioh Kim Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c | 2 ++ drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index ba00f0de14ca..ad77659800cd 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -408,6 +408,7 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess) "%s", str); if (err) { pr_err("kobject_init_and_add: %d\n", err); + kobject_put(&sess->kobj); return err; } err = sysfs_create_group(&sess->kobj, &rtrs_clt_sess_attr_group); @@ -419,6 +420,7 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess) &sess->kobj, "stats"); if (err) { pr_err("kobject_init_and_add: %d\n", err); + kobject_put(&sess->stats->kobj_stats); goto remove_group; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index cca3a0acbabc..0a3886629cae 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -236,6 +236,7 @@ static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess) &sess->kobj, "stats"); if (err) { rtrs_err(s, "kobject_init_and_add(): %d\n", err); + kobject_put(&sess->stats->kobj_stats); return err; } err = sysfs_create_group(&sess->stats->kobj_stats, @@ -292,8 +293,8 @@ int rtrs_srv_create_sess_files(struct rtrs_srv_sess *sess) sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group); put_kobj: kobject_del(&sess->kobj); - kobject_put(&sess->kobj); destroy_root: + kobject_put(&sess->kobj); rtrs_srv_destroy_once_sysfs_root_folders(sess); return err; From 7b47b27fcb4ece35fc8b0e9e7379ab14befb076a Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Dec 2020 15:19:04 +0100 Subject: [PATCH 036/414] RDMA/rtrs-clt: Consolidate rtrs_clt_destroy_sysfs_root_{folder,files} Since the two functions are called together, let's consolidate them in a new function rtrs_clt_destroy_sysfs_root. Link: https://lore.kernel.org/r/20201217141915.56989-9-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c | 9 +++------ drivers/infiniband/ulp/rtrs/rtrs-clt.c | 6 ++---- drivers/infiniband/ulp/rtrs/rtrs-clt.h | 3 +-- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index ad77659800cd..b6a0abf40589 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -471,15 +471,12 @@ int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt) return sysfs_create_group(&clt->dev.kobj, &rtrs_clt_attr_group); } -void rtrs_clt_destroy_sysfs_root_folders(struct rtrs_clt *clt) +void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt) { + sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group); + if (clt->kobj_paths) { kobject_del(clt->kobj_paths); kobject_put(clt->kobj_paths); } } - -void rtrs_clt_destroy_sysfs_root_files(struct rtrs_clt *clt) -{ - sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group); -} diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index b3fb5fb93815..99fc34950032 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2707,8 +2707,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, rtrs_clt_close_conns(sess, true); kobject_put(&sess->kobj); } - rtrs_clt_destroy_sysfs_root_files(clt); - rtrs_clt_destroy_sysfs_root_folders(clt); + rtrs_clt_destroy_sysfs_root(clt); free_clt(clt); out: @@ -2725,8 +2724,7 @@ void rtrs_clt_close(struct rtrs_clt *clt) struct rtrs_clt_sess *sess, *tmp; /* Firstly forbid sysfs access */ - rtrs_clt_destroy_sysfs_root_files(clt); - rtrs_clt_destroy_sysfs_root_folders(clt); + rtrs_clt_destroy_sysfs_root(clt); /* Now it is safe to iterate over all paths without locks */ list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) { diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index b8dbd701b3cb..a97a068c4c28 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -243,8 +243,7 @@ ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *stats, /* rtrs-clt-sysfs.c */ int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt); -void rtrs_clt_destroy_sysfs_root_folders(struct rtrs_clt *clt); -void rtrs_clt_destroy_sysfs_root_files(struct rtrs_clt *clt); +void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt); int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess); void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess, From 25a033f5a75873cfdd36eca3c702363b682afb42 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Dec 2020 15:19:05 +0100 Subject: [PATCH 037/414] RDMA/rtrs-clt: Kill wait_for_inflight_permits Let's wait the inflight permits before free it. Link: https://lore.kernel.org/r/20201217141915.56989-10-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Reviewed-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 99fc34950032..6a5b72ad5ba1 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1318,6 +1318,12 @@ static int alloc_permits(struct rtrs_clt *clt) static void free_permits(struct rtrs_clt *clt) { + if (clt->permits_map) { + size_t sz = clt->queue_depth; + + wait_event(clt->permits_wait, + find_first_bit(clt->permits_map, sz) >= sz); + } kfree(clt->permits_map); clt->permits_map = NULL; kfree(clt->permits); @@ -2607,19 +2613,8 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, return clt; } -static void wait_for_inflight_permits(struct rtrs_clt *clt) -{ - if (clt->permits_map) { - size_t sz = clt->queue_depth; - - wait_event(clt->permits_wait, - find_first_bit(clt->permits_map, sz) >= sz); - } -} - static void free_clt(struct rtrs_clt *clt) { - wait_for_inflight_permits(clt); free_permits(clt); free_percpu(clt->pcpu_path); mutex_destroy(&clt->paths_ev_mutex); From 88a8c54db98a83649afc21f15eb69d95f757872d Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Dec 2020 15:19:06 +0100 Subject: [PATCH 038/414] RDMA/rtrs-clt: Remove unnecessary 'goto out' This is not needed since the label is just after the place. Link: https://lore.kernel.org/r/20201217141915.56989-11-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 6a5b72ad5ba1..d99fb1a1c194 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2434,7 +2434,6 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) err = -ECONNRESET; else err = -ETIMEDOUT; - goto out; } out: From 11f7b3940df35f21f069c62902a24ee4b1b9fcd3 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Dec 2020 15:19:07 +0100 Subject: [PATCH 039/414] RDMA/rtrs-clt: Kill rtrs_clt_change_state It is just a wrapper of rtrs_clt_change_state_get_old, and we can reuse rtrs_clt_change_state_get_old with add the checking of 'old_state' is valid or not. Link: https://lore.kernel.org/r/20201217141915.56989-12-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 27 ++++++++++---------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index d99fb1a1c194..39dc8423d7df 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1359,21 +1359,14 @@ static bool rtrs_clt_change_state_get_old(struct rtrs_clt_sess *sess, bool changed; spin_lock_irq(&sess->state_wq.lock); - *old_state = sess->state; + if (old_state) + *old_state = sess->state; changed = __rtrs_clt_change_state(sess, new_state); spin_unlock_irq(&sess->state_wq.lock); return changed; } -static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess, - enum rtrs_clt_state new_state) -{ - enum rtrs_clt_state old_state; - - return rtrs_clt_change_state_get_old(sess, new_state, &old_state); -} - static void rtrs_clt_hb_err_handler(struct rtrs_con *c) { struct rtrs_clt_con *con = container_of(c, typeof(*con), c); @@ -1799,7 +1792,7 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con, static void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait) { - if (rtrs_clt_change_state(sess, RTRS_CLT_CLOSING)) + if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSING, NULL)) queue_work(rtrs_wq, &sess->close_work); if (wait) flush_work(&sess->close_work); @@ -2185,7 +2178,7 @@ static void rtrs_clt_close_work(struct work_struct *work) cancel_delayed_work_sync(&sess->reconnect_dwork); rtrs_clt_stop_and_destroy_conns(sess); - rtrs_clt_change_state(sess, RTRS_CLT_CLOSED); + rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSED, NULL); } static int init_conns(struct rtrs_clt_sess *sess) @@ -2237,7 +2230,7 @@ static int init_conns(struct rtrs_clt_sess *sess) * doing rdma_resolve_addr(), switch to CONNECTION_ERR state * manually to keep reconnecting. */ - rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); + rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); return err; } @@ -2254,7 +2247,7 @@ static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) { rtrs_err(sess->clt, "Sess info request send failed: %s\n", ib_wc_status_msg(wc->status)); - rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); + rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); return; } @@ -2378,7 +2371,7 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) out: rtrs_clt_update_wc_stats(con); rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); - rtrs_clt_change_state(sess, state); + rtrs_clt_change_state_get_old(sess, state, NULL); } static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) @@ -2443,7 +2436,7 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); if (unlikely(err)) /* If we've never taken async path because of malloc problems */ - rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); + rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); return err; } @@ -2500,7 +2493,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) /* Stop everything */ rtrs_clt_stop_and_destroy_conns(sess); msleep(RTRS_RECONNECT_BACKOFF); - if (rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING)) { + if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING, NULL)) { err = init_sess(sess); if (err) goto reconnect_again; @@ -2509,7 +2502,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) return; reconnect_again: - if (rtrs_clt_change_state(sess, RTRS_CLT_RECONNECTING)) { + if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING, NULL)) { sess->stats->reconnects.fail_cnt++; delay_ms = clt->reconnect_delay_sec * 1000; queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, From 7a8732a6f9a288b5b4ab5cce62d4016894f525a5 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Dec 2020 15:19:08 +0100 Subject: [PATCH 040/414] RDMA/rtrs-clt: Rename __rtrs_clt_change_state to rtrs_clt_change_state Let's rename it to rtrs_clt_change_state since the previous one is killed. Also update the comment to make it more clear. Link: https://lore.kernel.org/r/20201217141915.56989-13-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Reviewed-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 39dc8423d7df..3c90718f668d 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -178,18 +178,18 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess, } /** - * __rtrs_clt_change_state() - change the session state through session state + * rtrs_clt_change_state() - change the session state through session state * machine. * * @sess: client session to change the state of. * @new_state: state to change to. * - * returns true if successful, false if the requested state can not be set. + * returns true if sess's state is changed to new state, otherwise return false. * * Locks: * state_wq lock must be hold. */ -static bool __rtrs_clt_change_state(struct rtrs_clt_sess *sess, +static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess, enum rtrs_clt_state new_state) { enum rtrs_clt_state old_state; @@ -286,7 +286,7 @@ static bool rtrs_clt_change_state_from_to(struct rtrs_clt_sess *sess, spin_lock_irq(&sess->state_wq.lock); if (sess->state == old_state) - changed = __rtrs_clt_change_state(sess, new_state); + changed = rtrs_clt_change_state(sess, new_state); spin_unlock_irq(&sess->state_wq.lock); return changed; @@ -1361,7 +1361,7 @@ static bool rtrs_clt_change_state_get_old(struct rtrs_clt_sess *sess, spin_lock_irq(&sess->state_wq.lock); if (old_state) *old_state = sess->state; - changed = __rtrs_clt_change_state(sess, new_state); + changed = rtrs_clt_change_state(sess, new_state); spin_unlock_irq(&sess->state_wq.lock); return changed; From 8537f2de6519945890a2b0f3739b23f32b5c0a89 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 17 Dec 2020 15:19:09 +0100 Subject: [PATCH 041/414] RDMA/rtrs-srv: Fix missing wr_cqe We had a few places wr_cqe is not set, which could lead to NULL pointer deref or GPF in error case. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20201217141915.56989-14-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Guoqing Jiang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 92a216ddd9fd..f59731c5a96a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -267,6 +267,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id) WARN_ON_ONCE(rkey != wr->rkey); wr->wr.opcode = IB_WR_RDMA_WRITE; + wr->wr.wr_cqe = &io_comp_cqe; wr->wr.ex.imm_data = 0; wr->wr.send_flags = 0; @@ -294,6 +295,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id) inv_wr.sg_list = NULL; inv_wr.num_sge = 0; inv_wr.opcode = IB_WR_SEND_WITH_INV; + inv_wr.wr_cqe = &io_comp_cqe; inv_wr.send_flags = 0; inv_wr.ex.invalidate_rkey = rkey; } @@ -304,6 +306,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id) srv_mr = &sess->mrs[id->msg_id]; rwr.wr.opcode = IB_WR_REG_MR; + rwr.wr.wr_cqe = &local_reg_cqe; rwr.wr.num_sge = 0; rwr.mr = srv_mr->mr; rwr.wr.send_flags = 0; @@ -379,6 +382,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, if (need_inval) { if (likely(sg_cnt)) { + inv_wr.wr_cqe = &io_comp_cqe; inv_wr.sg_list = NULL; inv_wr.num_sge = 0; inv_wr.opcode = IB_WR_SEND_WITH_INV; @@ -421,6 +425,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, srv_mr = &sess->mrs[id->msg_id]; rwr.wr.next = &imm_wr; rwr.wr.opcode = IB_WR_REG_MR; + rwr.wr.wr_cqe = &local_reg_cqe; rwr.wr.num_sge = 0; rwr.wr.send_flags = 0; rwr.mr = srv_mr->mr; From eab098246625e91c1cbd6e8f75b09e4c9c28a9fc Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Dec 2020 15:19:10 +0100 Subject: [PATCH 042/414] RDMA/rtrs-clt: Refactor the failure cases in alloc_clt Make all failure cases go to the common path to avoid duplicate code. And some issued existed before. 1. clt need to be freed to avoid memory leak. 2. return ERR_PTR(-ENOMEM) if kobject_create_and_add fails, because rtrs_clt_open checks the return value of by call "IS_ERR(clt)". Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20201217141915.56989-15-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Reviewed-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 3c90718f668d..493f45a33b5e 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2568,11 +2568,8 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, clt->dev.class = rtrs_clt_dev_class; clt->dev.release = rtrs_clt_dev_release; err = dev_set_name(&clt->dev, "%s", sessname); - if (err) { - free_percpu(clt->pcpu_path); - kfree(clt); - return ERR_PTR(err); - } + if (err) + goto err; /* * Suppress user space notification until * sysfs files are created @@ -2580,29 +2577,31 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, dev_set_uevent_suppress(&clt->dev, true); err = device_register(&clt->dev); if (err) { - free_percpu(clt->pcpu_path); put_device(&clt->dev); - return ERR_PTR(err); + goto err; } clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj); if (!clt->kobj_paths) { - free_percpu(clt->pcpu_path); - device_unregister(&clt->dev); - return NULL; + err = -ENOMEM; + goto err_dev; } err = rtrs_clt_create_sysfs_root_files(clt); if (err) { - free_percpu(clt->pcpu_path); kobject_del(clt->kobj_paths); kobject_put(clt->kobj_paths); - device_unregister(&clt->dev); - return ERR_PTR(err); + goto err_dev; } dev_set_uevent_suppress(&clt->dev, false); kobject_uevent(&clt->dev.kobj, KOBJ_ADD); return clt; +err_dev: + device_unregister(&clt->dev); +err: + free_percpu(clt->pcpu_path); + kfree(clt); + return ERR_PTR(err); } static void free_clt(struct rtrs_clt *clt) From b38041d50add1c881fbc60eb2be93b58fc58ea21 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 17 Dec 2020 15:19:11 +0100 Subject: [PATCH 043/414] RDMA/rtrs: Do not signal for heatbeat For HB, there is no need to generate signal for completion. Also remove a comment accordingly. Fixes: c0894b3ea69d ("RDMA/rtrs: core: lib functions shared between client and server modules") Link: https://lore.kernel.org/r/20201217141915.56989-16-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Reported-by: Gioh Kim Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 1 - drivers/infiniband/ulp/rtrs/rtrs-srv.c | 1 - drivers/infiniband/ulp/rtrs/rtrs.c | 4 ++-- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 493f45a33b5e..2053bf99418a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -664,7 +664,6 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) case IB_WC_RDMA_WRITE: /* * post_send() RDMA write completions of IO reqs (read/write) - * and hb */ break; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index f59731c5a96a..de2d5b742c39 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1242,7 +1242,6 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) case IB_WC_SEND: /* * post_send() RDMA write completions of IO reqs (read/write) - * and hb */ atomic_add(srv->queue_depth, &con->sq_wr_avail); diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index df52427f1710..97af8f0bb806 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -310,7 +310,7 @@ void rtrs_send_hb_ack(struct rtrs_sess *sess) imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0); err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm, - IB_SEND_SIGNALED, NULL); + 0, NULL); if (err) { sess->hb_err_handler(usr_con); return; @@ -339,7 +339,7 @@ static void hb_work(struct work_struct *work) } imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0); err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm, - IB_SEND_SIGNALED, NULL); + 0, NULL); if (err) { sess->hb_err_handler(usr_con); return; From aaed465f761700dace9ab39521013cddaae4f5a3 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 17 Dec 2020 15:19:12 +0100 Subject: [PATCH 044/414] RDMA/rtrs-clt: Use bitmask to check sess->flags We may want to add new flags, so it's better to use bitmask to check flags. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20201217141915.56989-17-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Signed-off-by: Gioh Kim Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 2053bf99418a..7644c3f627ca 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -494,7 +494,7 @@ static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc) int err; struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F); + WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); err = rtrs_iu_post_recv(&con->c, iu); @@ -514,7 +514,7 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc) u32 buf_id; int err; - WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F); + WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); @@ -621,12 +621,12 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) } else if (imm_type == RTRS_HB_MSG_IMM) { WARN_ON(con->c.cid); rtrs_send_hb_ack(&sess->s); - if (sess->flags == RTRS_MSG_NEW_RKEY_F) + if (sess->flags & RTRS_MSG_NEW_RKEY_F) return rtrs_clt_recv_done(con, wc); } else if (imm_type == RTRS_HB_ACK_IMM) { WARN_ON(con->c.cid); sess->s.hb_missed_cnt = 0; - if (sess->flags == RTRS_MSG_NEW_RKEY_F) + if (sess->flags & RTRS_MSG_NEW_RKEY_F) return rtrs_clt_recv_done(con, wc); } else { rtrs_wrn(con->c.sess, "Unknown IMM type %u\n", @@ -654,7 +654,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE || wc->wc_flags & IB_WC_WITH_IMM)); WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done); - if (sess->flags == RTRS_MSG_NEW_RKEY_F) { + if (sess->flags & RTRS_MSG_NEW_RKEY_F) { if (wc->wc_flags & IB_WC_WITH_INVALIDATE) return rtrs_clt_recv_done(con, wc); @@ -679,7 +679,7 @@ static int post_recv_io(struct rtrs_clt_con *con, size_t q_size) struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); for (i = 0; i < q_size; i++) { - if (sess->flags == RTRS_MSG_NEW_RKEY_F) { + if (sess->flags & RTRS_MSG_NEW_RKEY_F) { struct rtrs_iu *iu = &con->rsp_ius[i]; err = rtrs_iu_post_recv(&con->c, iu); @@ -1563,7 +1563,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) sess->queue_depth * 3 + 1); } /* alloc iu to recv new rkey reply when server reports flags set */ - if (sess->flags == RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { + if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { con->rsp_ius = rtrs_iu_alloc(max_recv_wr, sizeof(*rsp), GFP_KERNEL, sess->s.dev->ib_dev, DMA_FROM_DEVICE, From e8ae7ddb48a1b81fd1e67da34a0cb59daf0445d6 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 17 Dec 2020 15:19:13 +0100 Subject: [PATCH 045/414] RDMA/rtrs-srv: Do not signal REG_MR We do not need to wait for REG_MR completion, so remove the SIGNAL flag. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20201217141915.56989-18-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Signed-off-by: Gioh Kim Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index de2d5b742c39..7a018e5de708 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -818,7 +818,7 @@ static int process_info_req(struct rtrs_srv_con *con, rwr[mri].wr.opcode = IB_WR_REG_MR; rwr[mri].wr.wr_cqe = &local_reg_cqe; rwr[mri].wr.num_sge = 0; - rwr[mri].wr.send_flags = mri ? 0 : IB_SEND_SIGNALED; + rwr[mri].wr.send_flags = 0; rwr[mri].mr = mr; rwr[mri].key = mr->rkey; rwr[mri].access = (IB_ACCESS_LOCAL_WRITE | From 6f5d1b3016d650f351e65c645a5eee5394547dd0 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 17 Dec 2020 15:19:14 +0100 Subject: [PATCH 046/414] RDMA/rtrs-srv: Init wr_cnt as 1 Fix up wr_avail accounting. if wr_cnt is 0, then we do SIGNAL for first wr, in completion we add queue_depth back, which is not right in the sense of tracking for available wr. So fix it by init wr_cnt to 1. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20201217141915.56989-19-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Signed-off-by: Gioh Kim Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 7a018e5de708..918f1cf140cd 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1603,7 +1603,7 @@ static int create_con(struct rtrs_srv_sess *sess, con->c.cm_id = cm_id; con->c.sess = &sess->s; con->c.cid = cid; - atomic_set(&con->wr_cnt, 0); + atomic_set(&con->wr_cnt, 1); if (con->c.cid == 0) { /* From 7fbc3c373eefc291ff96d48496106c106b7f81c6 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 17 Dec 2020 15:19:15 +0100 Subject: [PATCH 047/414] RDMA/rtrs: Fix KASAN: stack-out-of-bounds bug When KASAN is enabled, we notice warning below: [ 483.436975] ================================================================== [ 483.437234] BUG: KASAN: stack-out-of-bounds in _mlx5_ib_post_send+0x188a/0x2560 [mlx5_ib] [ 483.437430] Read of size 4 at addr ffff88a195fd7d30 by task kworker/1:3/6954 [ 483.437731] CPU: 1 PID: 6954 Comm: kworker/1:3 Kdump: loaded Tainted: G O 5.4.82-pserver #5.4.82-1+feature+linux+5.4.y+dbg+20201210.1532+987e7a6~deb10 [ 483.437976] Hardware name: Supermicro Super Server/X11DDW-L, BIOS 3.3 02/21/2020 [ 483.438168] Workqueue: rtrs_server_wq hb_work [rtrs_core] [ 483.438323] Call Trace: [ 483.438486] dump_stack+0x96/0xe0 [ 483.438646] ? _mlx5_ib_post_send+0x188a/0x2560 [mlx5_ib] [ 483.438802] print_address_description.constprop.6+0x1b/0x220 [ 483.438966] ? _mlx5_ib_post_send+0x188a/0x2560 [mlx5_ib] [ 483.439133] ? _mlx5_ib_post_send+0x188a/0x2560 [mlx5_ib] [ 483.439285] __kasan_report.cold.9+0x1a/0x32 [ 483.439444] ? _mlx5_ib_post_send+0x188a/0x2560 [mlx5_ib] [ 483.439597] kasan_report+0x10/0x20 [ 483.439752] _mlx5_ib_post_send+0x188a/0x2560 [mlx5_ib] [ 483.439910] ? update_sd_lb_stats+0xfb1/0xfc0 [ 483.440073] ? set_reg_wr+0x520/0x520 [mlx5_ib] [ 483.440222] ? update_group_capacity+0x340/0x340 [ 483.440377] ? find_busiest_group+0x314/0x870 [ 483.440526] ? update_sd_lb_stats+0xfc0/0xfc0 [ 483.440683] ? __bitmap_and+0x6f/0x100 [ 483.440832] ? __lock_acquire+0xa2/0x2150 [ 483.440979] ? __lock_acquire+0xa2/0x2150 [ 483.441128] ? __lock_acquire+0xa2/0x2150 [ 483.441279] ? debug_lockdep_rcu_enabled+0x23/0x60 [ 483.441430] ? lock_downgrade+0x390/0x390 [ 483.441582] ? __lock_acquire+0xa2/0x2150 [ 483.441729] ? __lock_acquire+0xa2/0x2150 [ 483.441876] ? newidle_balance+0x425/0x8f0 [ 483.442024] ? __lock_acquire+0xa2/0x2150 [ 483.442172] ? debug_lockdep_rcu_enabled+0x23/0x60 [ 483.442330] hb_work+0x15d/0x1d0 [rtrs_core] [ 483.442479] ? schedule_hb+0x50/0x50 [rtrs_core] [ 483.442627] ? lock_downgrade+0x390/0x390 [ 483.442781] ? process_one_work+0x40d/0xa50 [ 483.442931] process_one_work+0x4ee/0xa50 [ 483.443082] ? pwq_dec_nr_in_flight+0x110/0x110 [ 483.443231] ? do_raw_spin_lock+0x119/0x1d0 [ 483.443383] worker_thread+0x65/0x5c0 [ 483.443532] ? process_one_work+0xa50/0xa50 [ 483.451839] kthread+0x1e2/0x200 [ 483.451983] ? kthread_create_on_node+0xc0/0xc0 [ 483.452139] ret_from_fork+0x3a/0x50 The problem is we use wrong type when send wr, hw driver expect the type of IB_WR_RDMA_WRITE_WITH_IMM wr should be ib_rdma_wr, and doing container_of to access member. The fix is simple use ib_rdma_wr instread of ib_send_wr. Fixes: c0894b3ea69d ("RDMA/rtrs: core: lib functions shared between client and server modules") Link: https://lore.kernel.org/r/20201217141915.56989-20-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Reviewed-by: Gioh Kim Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 97af8f0bb806..d13aff0aa816 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -182,16 +182,16 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, u32 imm_data, enum ib_send_flags flags, struct ib_send_wr *head) { - struct ib_send_wr wr; + struct ib_rdma_wr wr; - wr = (struct ib_send_wr) { - .wr_cqe = cqe, - .send_flags = flags, - .opcode = IB_WR_RDMA_WRITE_WITH_IMM, - .ex.imm_data = cpu_to_be32(imm_data), + wr = (struct ib_rdma_wr) { + .wr.wr_cqe = cqe, + .wr.send_flags = flags, + .wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM, + .wr.ex.imm_data = cpu_to_be32(imm_data), }; - return rtrs_post_send(con->qp, head, &wr); + return rtrs_post_send(con->qp, head, &wr.wr); } EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty); From cf6d6fc27936025e851b3dc88be3b68c40d5fd08 Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Sat, 16 Jan 2021 15:35:42 +0100 Subject: [PATCH 048/414] docs: process/howto.rst: make sections on bug reporting match practice The file Documentation/process/howto.rst points to bugzilla.kernel.org as the primary place to report kernel bugs to. For most of the kernel that's the wrong place, as the MAINTAINERS file shows. Adjust those sections to make them match current practice. This change also removes a contradiction with the recently added text Documentation/admin-guide/reporting-issues.rst, which is a reason for a 'this needs further discussion' warning note in there. The change is thus a prerequisite to remove that warning, nevertheless it is left for now to make sure people review the text's approach more carefully. Signed-off-by: Thorsten Leemhuis Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20210116143542.69199-1-linux@leemhuis.info Signed-off-by: Jonathan Corbet --- Documentation/process/howto.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Documentation/process/howto.rst b/Documentation/process/howto.rst index 7a5c105e34d4..e4beeca57e5f 100644 --- a/Documentation/process/howto.rst +++ b/Documentation/process/howto.rst @@ -342,16 +342,10 @@ Adventurous testers are very welcome to runtime-test the linux-next. Bug Reporting ------------- -https://bugzilla.kernel.org is where the Linux kernel developers track kernel -bugs. Users are encouraged to report all bugs that they find in this -tool. For details on how to use the kernel bugzilla, please see: - - https://bugzilla.kernel.org/page.cgi?id=faq.html - The file 'Documentation/admin-guide/reporting-issues.rst' in the main kernel -source directory has a good template for how to report a possible kernel bug, -and details what kind of information is needed by the kernel developers to help -track down the problem. +source directory describes how to report a possible kernel bug, and details +what kind of information is needed by the kernel developers to help track +down the problem. Managing bug reports @@ -364,7 +358,13 @@ improve your skills, and other developers will be aware of your presence. Fixing bugs is one of the best ways to get merits among other developers, because not many people like wasting time fixing other people's bugs. -To work in the already reported bug reports, go to https://bugzilla.kernel.org. +To work on already reported bug reports, find a subsystem you are interested in. +Check the MAINTAINERS file where bugs for that subsystem get reported to; often +it will be a mailing list, rarely a bugtracker. Search the archives of said +place for recent reports and help where you see fit. You may also want to check +https://bugzilla.kernel.org for bug reports; only a handful of kernel subsystems +use it actively for reporting or tracking, nevertheless bugs for the whole +kernel get filed there. Mailing lists From 33eb12f2966ef9c529ce7138711a9c150a17931e Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 30 Dec 2020 15:02:40 +0200 Subject: [PATCH 049/414] RDMA/nldev: Return an error message on failure to turn auto mode The bounded counter can't be reconfigured to be in auto mode, in attempt to do it, the user will get an error, but without any hint why. Update nldev interface to return an error message through extack mechanism. Link: https://lore.kernel.org/r/20201230130240.180737-1-leon@kernel.org Signed-off-by: Patrisious Haddad Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 64 ++++++++++++++++-------------- drivers/infiniband/core/nldev.c | 4 +- include/rdma/rdma_counter.h | 3 +- 3 files changed, 37 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 92745522250e..2b9a1ee7a160 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -10,30 +10,35 @@ #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) -static int __counter_set_mode(struct rdma_counter_mode *curr, +static int __counter_set_mode(struct rdma_port_counter *port_counter, enum rdma_nl_counter_mode new_mode, enum rdma_nl_counter_mask new_mask) { - if ((new_mode == RDMA_COUNTER_MODE_AUTO) && - ((new_mask & (~ALL_AUTO_MODE_MASKS)) || - (curr->mode != RDMA_COUNTER_MODE_NONE))) - return -EINVAL; + if (new_mode == RDMA_COUNTER_MODE_AUTO && port_counter->num_counters) + if (new_mask & ~ALL_AUTO_MODE_MASKS || + port_counter->mode.mode != RDMA_COUNTER_MODE_NONE) + return -EINVAL; - curr->mode = new_mode; - curr->mask = new_mask; + port_counter->mode.mode = new_mode; + port_counter->mode.mask = new_mask; return 0; } /** * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode * - * When @on is true, the @mask must be set; When @on is false, it goes - * into manual mode if there's any counter, so that the user is able to - * manually access them. + * @dev: Device to operate + * @port: Port to use + * @mask: Mask to configure + * @extack: Message to the user + * + * Return 0 on success. */ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, - bool on, enum rdma_nl_counter_mask mask) + enum rdma_nl_counter_mask mask, + struct netlink_ext_ack *extack) { + enum rdma_nl_counter_mode mode = RDMA_COUNTER_MODE_AUTO; struct rdma_port_counter *port_counter; int ret; @@ -42,23 +47,23 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, return -EOPNOTSUPP; mutex_lock(&port_counter->lock); - if (on) { - ret = __counter_set_mode(&port_counter->mode, - RDMA_COUNTER_MODE_AUTO, mask); - } else { - if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { - ret = -EINVAL; - goto out; - } - - if (port_counter->num_counters) - ret = __counter_set_mode(&port_counter->mode, - RDMA_COUNTER_MODE_MANUAL, 0); - else - ret = __counter_set_mode(&port_counter->mode, - RDMA_COUNTER_MODE_NONE, 0); + if (mask) { + ret = __counter_set_mode(port_counter, mode, mask); + if (ret) + NL_SET_ERR_MSG( + extack, + "Turning on auto mode is not allowed when there is bound QP"); + goto out; } + if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { + ret = -EINVAL; + goto out; + } + + mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL : + RDMA_COUNTER_MODE_NONE; + ret = __counter_set_mode(port_counter, mode, 0); out: mutex_unlock(&port_counter->lock); return ret; @@ -122,8 +127,8 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port, mutex_lock(&port_counter->lock); switch (mode) { case RDMA_COUNTER_MODE_MANUAL: - ret = __counter_set_mode(&port_counter->mode, - RDMA_COUNTER_MODE_MANUAL, 0); + ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL, + 0); if (ret) { mutex_unlock(&port_counter->lock); goto err_mode; @@ -170,8 +175,7 @@ static void rdma_counter_free(struct rdma_counter *counter) port_counter->num_counters--; if (!port_counter->num_counters && (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) - __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE, - 0); + __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0); mutex_unlock(&port_counter->lock); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 08366e254b1d..d306049c22a2 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1768,9 +1768,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) mask = nla_get_u32( tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); - - ret = rdma_counter_set_auto_mode(device, port, - mask ? true : false, mask); + ret = rdma_counter_set_auto_mode(device, port, mask, extack); if (ret) goto err_msg; } else { diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h index eb99856e8b30..e75cf9742e04 100644 --- a/include/rdma/rdma_counter.h +++ b/include/rdma/rdma_counter.h @@ -46,7 +46,8 @@ struct rdma_counter { void rdma_counter_init(struct ib_device *dev); void rdma_counter_release(struct ib_device *dev); int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, - bool on, enum rdma_nl_counter_mask mask); + enum rdma_nl_counter_mask mask, + struct netlink_ext_ack *extack); int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port); int rdma_counter_unbind_qp(struct ib_qp *qp, bool force); From c305f1b408dc0eac0a4a65a3c92e50a0086ee71d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 17 Jan 2021 11:09:29 +0100 Subject: [PATCH 050/414] Documentation/kokr/howto: Replace HTTP links with HTTPS ones: Documentation/process Apply this commit to Korean: e7b4311ebcac ("Replace HTTP links with HTTPS ones: Documentation/process") Signed-off-by: SeongJae Park Link: https://lore.kernel.org/r/20210117100931.9347-2-sj38.park@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/ko_KR/howto.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/translations/ko_KR/howto.rst b/Documentation/translations/ko_KR/howto.rst index 240d29be38f2..0a42ffdea918 100644 --- a/Documentation/translations/ko_KR/howto.rst +++ b/Documentation/translations/ko_KR/howto.rst @@ -583,7 +583,7 @@ Pat이라는 이름을 가진 여자가 있을 수도 있는 것이다. 리눅 "The Perfect Patch" - http://www.ozlabs.org/~akpm/stuff/tpp.txt + https://www.ozlabs.org/~akpm/stuff/tpp.txt 이 모든 것을 하는 것은 매우 어려운 일이다. 완벽히 소화하는 데는 적어도 몇년이 From 0a610e5cd930cb7e09264d8e85ddb7fafa1cfc4b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 17 Jan 2021 11:09:30 +0100 Subject: [PATCH 051/414] docs/kokr: make reporting-bugs.rst obsolete Translate this commit to Korean: da514157c4f0 ("docs: make reporting-bugs.rst obsolete") Signed-off-by: SeongJae Park Link: https://lore.kernel.org/r/20210117100931.9347-3-sj38.park@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/ko_KR/howto.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/translations/ko_KR/howto.rst b/Documentation/translations/ko_KR/howto.rst index 0a42ffdea918..787f1e85f8a0 100644 --- a/Documentation/translations/ko_KR/howto.rst +++ b/Documentation/translations/ko_KR/howto.rst @@ -345,7 +345,7 @@ https://bugzilla.kernel.org 는 리눅스 커널 개발자들이 커널의 버 https://bugzilla.kernel.org/page.cgi?id=faq.html -메인 커널 소스 디렉토리에 있는 :ref:`admin-guide/reporting-bugs.rst ` +메인 커널 소스 디렉토리에 있는 'Documentation/admin-guide/reporting-issues.rst' 파일은 커널 버그라고 생각되는 것을 보고하는 방법에 관한 좋은 템플릿이며 문제를 추적하기 위해서 커널 개발자들이 필요로 하는 정보가 무엇들인지를 상세히 설명하고 있다. From e651fdb281854a718eea438c70c0e1c1d336fc0c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 17 Jan 2021 11:09:31 +0100 Subject: [PATCH 052/414] docs/kokr: Link memory-barriers.txt to rst This commit links Korean translation of 'memory-barriers.txt' in the translations index rst file as the original version is linked in 'Documentation/staging/index.rst'. Signed-off-by: SeongJae Park Link: https://lore.kernel.org/r/20210117100931.9347-4-sj38.park@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/ko_KR/index.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/translations/ko_KR/index.rst b/Documentation/translations/ko_KR/index.rst index 27995c4233de..b9e27d20b039 100644 --- a/Documentation/translations/ko_KR/index.rst +++ b/Documentation/translations/ko_KR/index.rst @@ -10,3 +10,18 @@ :maxdepth: 1 howto + + +리눅스 커널 메모리 배리어 +------------------------- + +.. raw:: latex + + \footnotesize + +.. include:: ./memory-barriers.txt + :literal: + +.. raw:: latex + + \normalsize From f0ea149eee6bce70f7c55ebf8bca93a0b02bfbd0 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 13 Jan 2021 16:33:15 +0000 Subject: [PATCH 053/414] docs: submitting-patches: Emphasise the requirement to Cc: stable when using Fixes: tag Clear-up any confusion surrounding the Fixes: tag with regards to the need to Cc: the stable mailing list when submitting stable patch candidates. Signed-off-by: Lee Jones Reviewed-by: Greg Kroah-Hartman Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/20210113163315.1331064-1-lee.jones@linaro.org Signed-off-by: Jonathan Corbet --- Documentation/process/submitting-patches.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 7c97ad580e7d..7f48cccc75cd 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -556,6 +556,11 @@ which stable kernel versions should receive your fix. This is the preferred method for indicating a bug fixed by the patch. See :ref:`describe_changes` for more details. +Note: Attaching a Fixes: tag does not subvert the stable kernel rules +process nor the requirement to Cc: stable@vger.kernel.org on all stable +patch candidates. For more information, please read +:ref:`Documentation/process/stable-kernel-rules.rst ` + .. _the_canonical_patch_format: The canonical patch format From 1bbd4380744f637a759e0a7bb7d8d1c38282e0c3 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 7 Jan 2021 16:36:29 +0800 Subject: [PATCH 054/414] RDMA/hns: Create CQ with selected CQN for bank load balance In order to improve performance by balancing the load between different banks of cache, the CQC cache is desigend to choose one of 4 banks according to lower 2 bits of CQN. The hns driver needs to count the number of CQ on each bank and then assigns the CQ being created to the bank with the minimum load first. Link: https://lore.kernel.org/r/1610008589-35770-1-git-send-email-liweihang@huawei.com Signed-off-by: Yangyang Li Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 115 +++++++++++++++++--- drivers/infiniband/hw/hns/hns_roce_device.h | 10 +- drivers/infiniband/hw/hns/hns_roce_main.c | 8 +- 3 files changed, 105 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 8533fc2d8df2..ffb7f7e5c641 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -38,11 +38,74 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h" +static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) +{ + u32 least_load = bank[0].inuse; + u8 bankid = 0; + u32 bankcnt; + u8 i; + + for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + bankcnt = bank[i].inuse; + if (bankcnt < least_load) { + least_load = bankcnt; + bankid = i; + } + } + + return bankid; +} + +static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + struct hns_roce_bank *bank; + u8 bankid; + int id; + + mutex_lock(&cq_table->bank_mutex); + bankid = get_least_load_bankid_for_cq(cq_table->bank); + bank = &cq_table->bank[bankid]; + + id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL); + if (id < 0) { + mutex_unlock(&cq_table->bank_mutex); + return id; + } + + /* the lower 2 bits is bankid */ + hr_cq->cqn = (id << CQ_BANKID_SHIFT) | bankid; + bank->inuse++; + mutex_unlock(&cq_table->bank_mutex); + + return 0; +} + +static inline u8 get_cq_bankid(unsigned long cqn) +{ + /* The lower 2 bits of CQN are used to hash to different banks */ + return (u8)(cqn & GENMASK(1, 0)); +} + +static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + struct hns_roce_bank *bank; + + bank = &cq_table->bank[get_cq_bankid(cqn)]; + + ida_free(&bank->ida, cqn >> CQ_BANKID_SHIFT); + + mutex_lock(&cq_table->bank_mutex); + bank->inuse--; + mutex_unlock(&cq_table->bank_mutex); +} + static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; - struct hns_roce_cq_table *cq_table; u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle; int ret; @@ -54,13 +117,6 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) return -EINVAL; } - cq_table = &hr_dev->cq_table; - ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); - if (ret) { - ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret); - return ret; - } - /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { @@ -110,7 +166,6 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); err_out: - hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); return ret; } @@ -138,7 +193,6 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) wait_for_completion(&hr_cq->free); hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); - hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, @@ -298,11 +352,17 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cq_buf; } + ret = alloc_cqn(hr_dev, hr_cq); + if (ret) { + ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret); + goto err_cq_db; + } + ret = alloc_cqc(hr_dev, hr_cq); if (ret) { ibdev_err(ibdev, "failed to alloc CQ context, ret = %d.\n", ret); - goto err_cq_db; + goto err_cqn; } /* @@ -326,6 +386,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, err_cqc: free_cqc(hr_dev, hr_cq); +err_cqn: + free_cqn(hr_dev, hr_cq->cqn); err_cq_db: free_cq_db(hr_dev, hr_cq, udata); err_cq_buf: @@ -341,9 +403,11 @@ int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) if (hr_dev->hw->destroy_cq) hr_dev->hw->destroy_cq(ib_cq, udata); - free_cq_buf(hr_dev, hr_cq); - free_cq_db(hr_dev, hr_cq, udata); free_cqc(hr_dev, hr_cq); + free_cqn(hr_dev, hr_cq->cqn); + free_cq_db(hr_dev, hr_cq, udata); + free_cq_buf(hr_dev, hr_cq); + return 0; } @@ -402,18 +466,33 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) complete(&hr_cq->free); } -int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) +void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + unsigned int reserved_from_bot; + unsigned int i; + mutex_init(&cq_table->bank_mutex); xa_init(&cq_table->array); - return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs, - hr_dev->caps.num_cqs - 1, - hr_dev->caps.reserved_cqs, 0); + reserved_from_bot = hr_dev->caps.reserved_cqs; + + for (i = 0; i < reserved_from_bot; i++) { + cq_table->bank[get_cq_bankid(i)].inuse++; + cq_table->bank[get_cq_bankid(i)].min++; + } + + for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) { + ida_init(&cq_table->bank[i].ida); + cq_table->bank[i].max = hr_dev->caps.num_cqs / + HNS_ROCE_CQ_BANK_NUM - 1; + } } void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) { - hns_roce_bitmap_cleanup(&hr_dev->cq_table.bitmap); + int i; + + for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) + ida_destroy(&hr_dev->cq_table.bank[i].ida); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 55d538625e36..c46b330a8c0a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -119,6 +119,9 @@ #define SRQ_DB_REG 0x230 #define HNS_ROCE_QP_BANK_NUM 8 +#define HNS_ROCE_CQ_BANK_NUM 4 + +#define CQ_BANKID_SHIFT 2 /* The chip implementation of the consumer index is calculated * according to twice the actual EQ depth @@ -536,9 +539,10 @@ struct hns_roce_qp_table { }; struct hns_roce_cq_table { - struct hns_roce_bitmap bitmap; struct xarray array; struct hns_roce_hem_table table; + struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; + struct mutex bank_mutex; }; struct hns_roce_srq_table { @@ -779,7 +783,7 @@ struct hns_roce_caps { u32 max_cqes; u32 min_cqes; u32 min_wqes; - int reserved_cqs; + u32 reserved_cqs; int reserved_srqs; int num_aeq_vectors; int num_comp_vectors; @@ -1164,7 +1168,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); -int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); +void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d9179bae4989..2b78b1ff63d3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -748,11 +748,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) goto err_pd_table_free; } - ret = hns_roce_init_cq_table(hr_dev); - if (ret) { - dev_err(dev, "Failed to init completion queue table.\n"); - goto err_mr_table_free; - } + hns_roce_init_cq_table(hr_dev); ret = hns_roce_init_qp_table(hr_dev); if (ret) { @@ -777,8 +773,6 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) err_cq_table_free: hns_roce_cleanup_cq_table(hr_dev); - -err_mr_table_free: hns_roce_cleanup_mr_table(hr_dev); err_pd_table_free: From 6970613b4c6a86501c1faa5a86dafa40e97b794e Mon Sep 17 00:00:00 2001 From: Marc Koderer Date: Tue, 12 Jan 2021 16:40:54 +0100 Subject: [PATCH 055/414] samples/kprobes: Add ARM support Plenty of architectures are already supported and this adds ARM support. Signed-off-by: Marc Koderer Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20210112154054.17138-1-marc@koderer.com Signed-off-by: Jonathan Corbet --- samples/kprobes/kprobe_example.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index 192aa68db0c0..331dcf151532 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -43,6 +43,10 @@ static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs) " pstate = 0x%lx\n", p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate); #endif +#ifdef CONFIG_ARM + pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx, cpsr = 0x%lx\n", + p->symbol_name, p->addr, (long)regs->ARM_pc, (long)regs->ARM_cpsr); +#endif #ifdef CONFIG_S390 pr_info("<%s> pre_handler: p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n", p->symbol_name, p->addr, regs->psw.addr, regs->flags); @@ -72,6 +76,10 @@ static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs, pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n", p->symbol_name, p->addr, (long)regs->pstate); #endif +#ifdef CONFIG_ARM + pr_info("<%s> post_handler: p->addr = 0x%p, cpsr = 0x%lx\n", + p->symbol_name, p->addr, (long)regs->ARM_cpsr); +#endif #ifdef CONFIG_S390 pr_info("<%s> pre_handler: p->addr, 0x%p, flags = 0x%lx\n", p->symbol_name, p->addr, regs->flags); From 1a63f9cce7b7872567f9a40708689e263d948b21 Mon Sep 17 00:00:00 2001 From: Milan Lakhani Date: Tue, 12 Jan 2021 13:41:01 +0000 Subject: [PATCH 056/414] docs: Remove make headers_check from checklist Remove the make headers_check step from submit-checklist.rst as this is no longer functional. Signed-off-by: Milan Lakhani Link: https://lore.kernel.org/r/1610458861-2832-1-git-send-email-milan.lakhani@codethink.co.uk Signed-off-by: Jonathan Corbet --- Documentation/process/submit-checklist.rst | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst index 230ee42f872f..f709beaf02c9 100644 --- a/Documentation/process/submit-checklist.rst +++ b/Documentation/process/submit-checklist.rst @@ -89,30 +89,28 @@ and elsewhere regarding submitting Linux kernel patches. Patches that change userspace interfaces should be CCed to linux-api@vger.kernel.org. -19) Check that it all passes ``make headers_check``. - -20) Has been checked with injection of at least slab and page-allocation +19) Has been checked with injection of at least slab and page-allocation failures. See ``Documentation/fault-injection/``. If the new code is substantial, addition of subsystem-specific fault injection might be appropriate. -21) Newly-added code has been compiled with ``gcc -W`` (use +20) Newly-added code has been compiled with ``gcc -W`` (use ``make EXTRA_CFLAGS=-W``). This will generate lots of noise, but is good for finding bugs like "warning: comparison between signed and unsigned". -22) Tested after it has been merged into the -mm patchset to make sure +21) Tested after it has been merged into the -mm patchset to make sure that it still works with all of the other queued patches and various changes in the VM, VFS, and other subsystems. -23) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a +22) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a comment in the source code that explains the logic of what they are doing and why. -24) If any ioctl's are added by the patch, then also update +23) If any ioctl's are added by the patch, then also update ``Documentation/userspace-api/ioctl/ioctl-number.rst``. -25) If your modified source code depends on or uses any of the kernel +24) If your modified source code depends on or uses any of the kernel APIs or features that are related to the following ``Kconfig`` symbols, then test multiple builds with the related ``Kconfig`` symbols disabled and/or ``=m`` (if that option is available) [not all of these at the From 96c0f7c0b9ac00fff928b9e81d134639a4e4ba56 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Tue, 12 Jan 2021 14:19:36 +0100 Subject: [PATCH 057/414] Documentation: fix typos in split page table lock description Signed-off-by: Rolf Eike Beer Link: https://lore.kernel.org/r/2338863.uUFqZTUbry@devpool47 Signed-off-by: Jonathan Corbet --- Documentation/vm/split_page_table_lock.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/vm/split_page_table_lock.rst b/Documentation/vm/split_page_table_lock.rst index ff51f4a5494d..c08919662704 100644 --- a/Documentation/vm/split_page_table_lock.rst +++ b/Documentation/vm/split_page_table_lock.rst @@ -32,7 +32,7 @@ There are helpers to lock/unlock a table and other accessor functions: Split page table lock for PTE tables is enabled compile-time if CONFIG_SPLIT_PTLOCK_CPUS (usually 4) is less or equal to NR_CPUS. -If split lock is disabled, all tables guaded by mm->page_table_lock. +If split lock is disabled, all tables are guarded by mm->page_table_lock. Split page table lock for PMD tables is enabled, if it's enabled for PTE tables and the architecture supports it (see below). From 52042e2db45290f6a512d525518488b7bf143531 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:47 +0100 Subject: [PATCH 058/414] scripts: kernel-doc: validate kernel-doc markup with the actual names Kernel-doc currently expects that the kernel-doc markup to come just before the function/enum/struct/union/typedef prototype. Yet, if it find things like: /** * refcount_add - add a value to a refcount * @i: the value to add to the refcount * @r: the refcount */ static inline void __refcount_add(int i, refcount_t *r, int *oldp); static inline void refcount_add(int i, refcount_t *r); Kernel-doc will do the wrong thing: foobar.h:6: warning: Function parameter or member 'oldp' not described in '__refcount_add' .. c:function:: void __refcount_add (int i, refcount_t *r, int *oldp) add a value to a refcount **Parameters** ``int i`` the value to add to the refcount ``refcount_t *r`` the refcount ``int *oldp`` *undescribed* Basically, it will document "__refcount_add" with the kernel-doc markup for refcount_add. If both functions have the same arguments, this won't even produce any warning! Add a logic to check if the kernel-doc identifier matches the actual name of the C function or data structure that will be documented. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/081546f141a496d6cabb99a4adc140444c705e93.1610610937.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 62 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 6325bec3f66f..a9a92e623dbc 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -382,6 +382,9 @@ my $inline_doc_state; # 'function', 'struct', 'union', 'enum', 'typedef' my $decl_type; +# Name of the kernel-doc identifier for non-DOC markups +my $identifier; + my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start. my $doc_end = '\*/'; my $doc_com = '\s*\*\s*'; @@ -1203,6 +1206,11 @@ sub dump_struct($$) { $declaration_name = $2; my $members = $3; + if ($identifier ne $declaration_name) { + print STDERR "${file}:$.: warning: expecting prototype for $decl_type $identifier. Prototype was for $decl_type $declaration_name instead\n"; + return; + } + # ignore members marked private: $members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; $members =~ s/\/\*\s*private:.*//gosi; @@ -1391,6 +1399,11 @@ sub dump_enum($$) { } if ($members) { + if ($identifier ne $declaration_name) { + print STDERR "${file}:$.: warning: expecting prototype for enum $identifier. Prototype was for enum $declaration_name instead\n"; + return; + } + my %_members; $members =~ s/\s+$//; @@ -1451,6 +1464,11 @@ sub dump_typedef($$) { my $args = $3; $return_type =~ s/^\s+//; + if ($identifier ne $declaration_name) { + print STDERR "${file}:$.: warning: expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n"; + return; + } + create_parameterlist($args, ',', $file, $declaration_name); output_declaration($declaration_name, @@ -1477,6 +1495,11 @@ sub dump_typedef($$) { if ($x =~ /typedef.*\s+(\w+)\s*;/) { $declaration_name = $1; + if ($identifier ne $declaration_name) { + print STDERR "${file}:$.: warning: expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n"; + return; + } + output_declaration($declaration_name, 'typedef', {'typedef' => $declaration_name, @@ -1796,6 +1819,11 @@ sub dump_function($$) { return; } + if ($identifier ne $declaration_name) { + print STDERR "${file}:$.: warning: expecting prototype for $identifier(). Prototype was for $declaration_name() instead\n"; + return; + } + my $prms = join " ", @parameterlist; check_sections($file, $declaration_name, "function", $sectcheck, $prms); @@ -1878,6 +1906,7 @@ sub tracepoint_munge($) { "$prototype\n"; } else { $prototype = "static inline void trace_$tracepointname($tracepointargs)"; + $identifier = "trace_$identifier"; } } @@ -2041,7 +2070,6 @@ sub process_normal() { # sub process_name($$) { my $file = shift; - my $identifier; my $descr; if (/$doc_block/o) { @@ -2054,12 +2082,19 @@ sub process_name($$) { } else { $section = $1; } - } - elsif (/$doc_decl/o) { + } elsif (/$doc_decl/o) { $identifier = $1; - if (/\s*([\w\s]+?)(\(\))?\s*-/) { + if (/\s*([\w\s]+?)(\(\))?\s*([-:].*)?$/) { $identifier = $1; } + if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { + $decl_type = $1; + $identifier = $2; + } else { + $decl_type = 'function'; + $identifier =~ s/^define\s+//; + } + $identifier =~ s/\s+$//; $state = STATE_BODY; # if there's no @param blocks need to set up default section @@ -2067,7 +2102,7 @@ sub process_name($$) { $contents = ""; $section = $section_default; $new_start_line = $. + 1; - if (/-(.*)/) { + if (/[-:](.*)/) { # strip leading/trailing/multiple spaces $descr= $1; $descr =~ s/^\s*//; @@ -2085,20 +2120,15 @@ sub process_name($$) { ++$warnings; } - if ($identifier =~ m/^struct\b/) { - $decl_type = 'struct'; - } elsif ($identifier =~ m/^union\b/) { - $decl_type = 'union'; - } elsif ($identifier =~ m/^enum\b/) { - $decl_type = 'enum'; - } elsif ($identifier =~ m/^typedef\b/) { - $decl_type = 'typedef'; - } else { - $decl_type = 'function'; + if ($identifier eq "") { + print STDERR "${file}:$.: warning: wrong kernel-doc identifier on line:\n"; + print STDERR $_; + ++$warnings; + $state = STATE_NORMAL; } if ($verbose) { - print STDERR "${file}:$.: info: Scanning doc for $identifier\n"; + print STDERR "${file}:$.: info: Scanning doc for $decl_type $identifier\n"; } } else { print STDERR "${file}:$.: warning: Cannot understand $_ on line $.", From f6919d56388c95dba2e630670a77c380e4616c50 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 7 Jan 2021 01:43:27 -0800 Subject: [PATCH 059/414] RDMA/bnxt_re: Code refactor while populating user MRs Refactor code that populates MR page buffer list. Instead of allocating a pbl_tbl to hold the buffer list, pass the struct ib_umem directly to bnxt_qplib_alloc_init_hwq() as done for other user space memories. Fix the PBL level to handle the above mentioned change. Also, remove an unwanted flag from the input to bnxt_qplib_reg_mr() function. Link: https://lore.kernel.org/r/1610012608-14528-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 41 ++++-------------------- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 17 ++++------ drivers/infiniband/hw/bnxt_re/qplib_sp.h | 2 +- 3 files changed, 13 insertions(+), 47 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 401bdc9e931e..00d2a5a678a9 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -469,7 +469,6 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) struct bnxt_re_mr *mr = NULL; dma_addr_t dma_addr = 0; struct ib_mw *mw; - u64 pbl_tbl; int rc; dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES, @@ -504,9 +503,8 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->qplib_mr.va = (u64)(unsigned long)fence->va; mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES; - pbl_tbl = dma_addr; - rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl, - BNXT_RE_FENCE_PBL_SIZE, false, PAGE_SIZE); + rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, + BNXT_RE_FENCE_PBL_SIZE, PAGE_SIZE); if (rc) { ibdev_err(&rdev->ibdev, "Failed to register fence-MR\n"); goto fail; @@ -3589,7 +3587,6 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr; - u64 pbl = 0; int rc; mr = kzalloc(sizeof(*mr), GFP_KERNEL); @@ -3608,7 +3605,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) mr->qplib_mr.hwq.level = PBL_LVL_MAX; mr->qplib_mr.total_size = -1; /* Infinte length */ - rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false, + rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, 0, PAGE_SIZE); if (rc) goto fail_mr; @@ -3779,19 +3776,6 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw) return rc; } -static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, - int page_shift) -{ - u64 *pbl_tbl = pbl_tbl_orig; - u64 page_size = BIT_ULL(page_shift); - struct ib_block_iter biter; - - rdma_umem_for_each_dma_block(umem, &biter, page_size) - *pbl_tbl++ = rdma_block_iter_dma_address(&biter); - - return pbl_tbl - pbl_tbl_orig; -} - /* uverbs */ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, @@ -3801,7 +3785,6 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr; struct ib_umem *umem; - u64 *pbl_tbl = NULL; unsigned long page_size; int umem_pgs, rc; @@ -3855,30 +3838,18 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, } umem_pgs = ib_umem_num_dma_blocks(umem, page_size); - pbl_tbl = kcalloc(umem_pgs, sizeof(*pbl_tbl), GFP_KERNEL); - if (!pbl_tbl) { - rc = -ENOMEM; - goto free_umem; - } - - /* Map umem buf ptrs to the PBL */ - umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, order_base_2(page_size)); - rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl, - umem_pgs, false, page_size); + rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem, + umem_pgs, page_size); if (rc) { ibdev_err(&rdev->ibdev, "Failed to register user MR"); - goto fail; + goto free_umem; } - kfree(pbl_tbl); - mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->ib_mr.rkey = mr->qplib_mr.lkey; atomic_inc(&rdev->mr_count); return &mr->ib_mr; -fail: - kfree(pbl_tbl); free_umem: ib_umem_release(umem); free_mrw: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 6316179583a6..22cb46a7e147 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -650,16 +650,15 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, } int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, - u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size) + struct ib_umem *umem, int num_pbls, u32 buf_pg_size) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_sg_info sginfo = {}; struct creq_register_mr_resp resp; struct cmdq_register_mr req; - int pg_ptrs, pages, i, rc; u16 cmd_flags = 0, level; - dma_addr_t **pbl_ptr; + int pages, rc, pg_ptrs; u32 pg_size; if (num_pbls) { @@ -683,9 +682,10 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, /* Use system PAGE_SIZE */ hwq_attr.res = res; hwq_attr.depth = pages; - hwq_attr.stride = PAGE_SIZE; + hwq_attr.stride = buf_pg_size; hwq_attr.type = HWQ_TYPE_MR; hwq_attr.sginfo = &sginfo; + hwq_attr.sginfo->umem = umem; hwq_attr.sginfo->npages = pages; hwq_attr.sginfo->pgsize = PAGE_SIZE; hwq_attr.sginfo->pgshft = PAGE_SHIFT; @@ -695,11 +695,6 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, "SP: Reg MR memory allocation failed\n"); return -ENOMEM; } - /* Write to the hwq */ - pbl_ptr = (dma_addr_t **)mr->hwq.pbl_ptr; - for (i = 0; i < num_pbls; i++) - pbl_ptr[PTR_PG(i)][PTR_IDX(i)] = - (pbl_tbl[i] & PAGE_MASK) | PTU_PTE_VALID; } RCFW_CMD_PREP(req, REGISTER_MR, cmd_flags); @@ -711,7 +706,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, req.pbl = 0; pg_size = PAGE_SIZE; } else { - level = mr->hwq.level + 1; + level = mr->hwq.level; req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]); } pg_size = buf_pg_size ? buf_pg_size : PAGE_SIZE; @@ -728,7 +723,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, req.mr_size = cpu_to_le64(mr->total_size); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, block); + (void *)&resp, NULL, false); if (rc) goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 967890cd81f2..bc228340684f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -254,7 +254,7 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, bool block); int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, - u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size); + struct ib_umem *umem, int num_pbls, u32 buf_pg_size); int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr); int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, int max); From c930af5ab4311f1f47601db289debee20ac19c1c Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 7 Jan 2021 01:43:28 -0800 Subject: [PATCH 060/414] RDMA/bnxt_re: Allow bigger MR creation Allow users to create bigger MRs. Remove the check that prevented creating MRs with number of pages more than 512. Link: https://lore.kernel.org/r/1610012608-14528-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 8 -------- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 14 ++------------ 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 00d2a5a678a9..ba515efd4fdc 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3829,14 +3829,6 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, } mr->qplib_mr.total_size = length; - if (page_size == BNXT_RE_PAGE_SIZE_4K && - length > BNXT_RE_MAX_MR_SIZE_LOW) { - ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu", - length, (u64)BNXT_RE_MAX_MR_SIZE_LOW); - rc = -EINVAL; - goto free_umem; - } - umem_pgs = ib_umem_num_dma_blocks(umem, page_size); rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem, umem_pgs, page_size); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 22cb46a7e147..049b3576302b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -658,24 +658,14 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, struct creq_register_mr_resp resp; struct cmdq_register_mr req; u16 cmd_flags = 0, level; - int pages, rc, pg_ptrs; + int pages, rc; u32 pg_size; if (num_pbls) { + pages = roundup_pow_of_two(num_pbls); /* Allocate memory for the non-leaf pages to store buf ptrs. * Non-leaf pages always uses system PAGE_SIZE */ - pg_ptrs = roundup_pow_of_two(num_pbls); - pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT; - if (!pages) - pages++; - - if (pages > MAX_PBL_LVL_1_PGS) { - dev_err(&res->pdev->dev, - "SP: Reg MR: pages requested (0x%x) exceeded max (0x%x)\n", - pages, MAX_PBL_LVL_1_PGS); - return -ENOMEM; - } /* Free the hwq if it already exist, must be a rereg */ if (mr->hwq.max_elements) bnxt_qplib_free_hwq(res, &mr->hwq); From 8ebe0e2a7efa7d94e2c3306a76c2b0c387465595 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 10 Jan 2021 11:19:01 +0000 Subject: [PATCH 061/414] IB/isert: Remove unneeded new lines The Linux convention is to have only 1 new line between functions. Link: https://lore.kernel.org/r/20210110111903.486681-1-mgurtovoy@nvidia.com Reviewed-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/isert/ib_isert.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 2ba27221ea85..5958929b7dec 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -71,7 +71,6 @@ static int isert_sg_tablesize_set(const char *val, const struct kernel_param *kp return param_set_int(val, kp); } - static inline bool isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) { @@ -79,7 +78,6 @@ isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) cmd->prot_op != TARGET_PROT_NORMAL); } - static void isert_qp_event_callback(struct ib_event *e, void *context) { From ec53a2a6540d6025f0ff205af0ba635c9b1cce02 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 10 Jan 2021 11:19:02 +0000 Subject: [PATCH 062/414] IB/isert: Remove unneeded semicolon No need to add semicolon after closing bracket. Link: https://lore.kernel.org/r/20210110111903.486681-2-mgurtovoy@nvidia.com Reviewed-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/isert/ib_isert.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 5958929b7dec..96514f675427 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1991,7 +1991,7 @@ isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_domain *domain) if (se_cmd->prot_type == TARGET_DIF_TYPE1_PROT || se_cmd->prot_type == TARGET_DIF_TYPE2_PROT) domain->sig.dif.ref_remap = true; -}; +} static int isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs) From a6dc16b6996388d016df83fb92eae16242ab7ac5 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 10 Jan 2021 11:19:03 +0000 Subject: [PATCH 063/414] IB/isert: Simplify signature cap check Use if/else clause instead of "condition ? val1 : val2" to make the code cleaner and simpler. Link: https://lore.kernel.org/r/20210110111903.486681-3-mgurtovoy@nvidia.com Reviewed-by: Israel Rukshin Signed-off-by: Max Gurtovoy Acked-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/isert/ib_isert.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 96514f675427..7305ed8976c2 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -230,8 +230,10 @@ isert_create_device_ib_res(struct isert_device *device) } /* Check signature cap */ - device->pi_capable = ib_dev->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER ? true : false; + if (ib_dev->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER) + device->pi_capable = true; + else + device->pi_capable = false; return 0; } From 8798e4ad0abe0ba1221928a46561981c510be0c6 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 30 Dec 2020 15:01:19 +0200 Subject: [PATCH 064/414] RDMA/mlx5: Use the correct obj_id upon DEVX TIR creation Use the correct obj_id upon DEVX TIR creation by strictly taking the tirn 24 bits and not the general obj_id which is 32 bits. Fixes: 7efce3691d33 ("IB/mlx5: Add obj create and destroy functionality") Link: https://lore.kernel.org/r/20201230130121.180350-2-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 819c142857d6..ff8e17d7f7ca 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1064,7 +1064,9 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT); break; case MLX5_CMD_OP_CREATE_TIR: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + *obj_id = MLX5_GET(create_tir_out, out, tirn); + MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, din, tirn, *obj_id); break; case MLX5_CMD_OP_CREATE_TIS: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS); From 1368ead04c361bb4595ace9122c79dd75e54a650 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 30 Dec 2020 15:01:21 +0200 Subject: [PATCH 065/414] RDMA/mlx5: Use strict get/set operations for obj_id Use strict get/set operations for obj_id based on the specific object type. This comes to prevent any miss match between the general header to the legacy header commands. Link: https://lore.kernel.org/r/20201230130121.180350-4-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 199 ++++++++++++++++++++++-------- 1 file changed, 146 insertions(+), 53 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index ff8e17d7f7ca..e6d2709eb09e 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -94,13 +94,13 @@ struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; u32 dinlen; - u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; + u32 dinbox[MLX5_ST_SZ_DW(destroy_umem_in)]; }; struct devx_umem_reg_cmd { void *in; u32 inlen; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u32 out[MLX5_ST_SZ_DW(create_umem_out)]; }; static struct mlx5_ib_ucontext * @@ -111,8 +111,8 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) { - u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {}; + u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {}; void *uctx; int err; u16 uid; @@ -138,14 +138,14 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) if (err) return err; - uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + uid = MLX5_GET(create_uctx_out, out, uid); return uid; } void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) { - u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {}; MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); MLX5_SET(destroy_uctx_in, in, uid, uid); @@ -288,6 +288,80 @@ static u64 get_enc_obj_id(u32 opcode, u32 obj_id) return ((u64)opcode << 32) | obj_id; } +static u32 devx_get_created_obj_id(const void *in, const void *out, u16 opcode) +{ + switch (opcode) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + return MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + case MLX5_CMD_OP_CREATE_UMEM: + return MLX5_GET(create_umem_out, out, umem_id); + case MLX5_CMD_OP_CREATE_MKEY: + return MLX5_GET(create_mkey_out, out, mkey_index); + case MLX5_CMD_OP_CREATE_CQ: + return MLX5_GET(create_cq_out, out, cqn); + case MLX5_CMD_OP_ALLOC_PD: + return MLX5_GET(alloc_pd_out, out, pd); + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + return MLX5_GET(alloc_transport_domain_out, out, + transport_domain); + case MLX5_CMD_OP_CREATE_RMP: + return MLX5_GET(create_rmp_out, out, rmpn); + case MLX5_CMD_OP_CREATE_SQ: + return MLX5_GET(create_sq_out, out, sqn); + case MLX5_CMD_OP_CREATE_RQ: + return MLX5_GET(create_rq_out, out, rqn); + case MLX5_CMD_OP_CREATE_RQT: + return MLX5_GET(create_rqt_out, out, rqtn); + case MLX5_CMD_OP_CREATE_TIR: + return MLX5_GET(create_tir_out, out, tirn); + case MLX5_CMD_OP_CREATE_TIS: + return MLX5_GET(create_tis_out, out, tisn); + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + return MLX5_GET(alloc_q_counter_out, out, counter_set_id); + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + return MLX5_GET(create_flow_table_out, out, table_id); + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + return MLX5_GET(create_flow_group_out, out, group_id); + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + return MLX5_GET(set_fte_in, in, flow_index); + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + return MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: + return MLX5_GET(alloc_packet_reformat_context_out, out, + packet_reformat_id); + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + return MLX5_GET(alloc_modify_header_context_out, out, + modify_header_id); + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + return MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + return MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + return MLX5_GET(set_l2_table_entry_in, in, table_index); + case MLX5_CMD_OP_CREATE_QP: + return MLX5_GET(create_qp_out, out, qpn); + case MLX5_CMD_OP_CREATE_SRQ: + return MLX5_GET(create_srq_out, out, srqn); + case MLX5_CMD_OP_CREATE_XRC_SRQ: + return MLX5_GET(create_xrc_srq_out, out, xrc_srqn); + case MLX5_CMD_OP_CREATE_DCT: + return MLX5_GET(create_dct_out, out, dctn); + case MLX5_CMD_OP_CREATE_XRQ: + return MLX5_GET(create_xrq_out, out, xrqn); + case MLX5_CMD_OP_ATTACH_TO_MCG: + return MLX5_GET(attach_to_mcg_in, in, qpn); + case MLX5_CMD_OP_ALLOC_XRCD: + return MLX5_GET(alloc_xrcd_out, out, xrcd); + case MLX5_CMD_OP_CREATE_PSV: + return MLX5_GET(create_psv_out, out, psv0_index); + default: + /* The entry must match to one of the devx_is_obj_create_cmd */ + WARN_ON(true); + return 0; + } +} + static u64 devx_get_obj_id(const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); @@ -399,8 +473,8 @@ static u64 devx_get_obj_id(const void *in) break; case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT, - MLX5_GET(general_obj_in_cmd_hdr, in, - obj_id)); + MLX5_GET(query_modify_header_context_in, + in, modify_header_id)); break; case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT, @@ -1019,65 +1093,76 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, u32 *dinlen, u32 *obj_id) { - u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type); + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid); - *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + *obj_id = devx_get_created_obj_id(in, out, opcode); *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr); - - MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid); - switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) { + switch (opcode) { case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, + MLX5_GET(general_obj_in_cmd_hdr, in, obj_type)); break; case MLX5_CMD_OP_CREATE_UMEM: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_umem_in, din, opcode, MLX5_CMD_OP_DESTROY_UMEM); + MLX5_SET(destroy_umem_in, din, umem_id, *obj_id); break; case MLX5_CMD_OP_CREATE_MKEY: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, din, opcode, + MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, *obj_id); break; case MLX5_CMD_OP_CREATE_CQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, cqn, *obj_id); break; case MLX5_CMD_OP_ALLOC_PD: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, din, pd, *obj_id); break; case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_transport_domain_in, din, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, din, transport_domain, + *obj_id); break; case MLX5_CMD_OP_CREATE_RMP: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, din, rmpn, *obj_id); break; case MLX5_CMD_OP_CREATE_SQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, din, sqn, *obj_id); break; case MLX5_CMD_OP_CREATE_RQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, din, rqn, *obj_id); break; case MLX5_CMD_OP_CREATE_RQT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, din, rqtn, *obj_id); break; case MLX5_CMD_OP_CREATE_TIR: - *obj_id = MLX5_GET(create_tir_out, out, tirn); MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR); MLX5_SET(destroy_tir_in, din, tirn, *obj_id); break; case MLX5_CMD_OP_CREATE_TIS: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, din, tisn, *obj_id); break; case MLX5_CMD_OP_ALLOC_Q_COUNTER: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_q_counter_in, din, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, din, counter_set_id, *obj_id); break; case MLX5_CMD_OP_CREATE_FLOW_TABLE: *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in); - *obj_id = MLX5_GET(create_flow_table_out, out, table_id); MLX5_SET(destroy_flow_table_in, din, other_vport, MLX5_GET(create_flow_table_in, in, other_vport)); MLX5_SET(destroy_flow_table_in, din, vport_number, @@ -1085,12 +1170,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(destroy_flow_table_in, din, table_type, MLX5_GET(create_flow_table_in, in, table_type)); MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_flow_table_in, din, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); break; case MLX5_CMD_OP_CREATE_FLOW_GROUP: *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in); - *obj_id = MLX5_GET(create_flow_group_out, out, group_id); MLX5_SET(destroy_flow_group_in, din, other_vport, MLX5_GET(create_flow_group_in, in, other_vport)); MLX5_SET(destroy_flow_group_in, din, vport_number, @@ -1100,12 +1184,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(destroy_flow_group_in, din, table_id, MLX5_GET(create_flow_group_in, in, table_id)); MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_flow_group_in, din, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); break; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in); - *obj_id = MLX5_GET(set_fte_in, in, flow_index); MLX5_SET(delete_fte_in, din, other_vport, MLX5_GET(set_fte_in, in, other_vport)); MLX5_SET(delete_fte_in, din, vport_number, @@ -1115,63 +1198,70 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(delete_fte_in, din, table_id, MLX5_GET(set_fte_in, in, table_id)); MLX5_SET(delete_fte_in, din, flow_index, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_fte_in, din, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); break; case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_flow_counter_in, din, opcode, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + MLX5_SET(dealloc_flow_counter_in, din, flow_counter_id, + *obj_id); break; case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_packet_reformat_context_in, din, opcode, MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, din, + packet_reformat_id, *obj_id); break; case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_modify_header_context_in, din, opcode, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, din, + modify_header_id, *obj_id); break; case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in); - *obj_id = MLX5_GET(create_scheduling_element_out, out, - scheduling_element_id); MLX5_SET(destroy_scheduling_element_in, din, scheduling_hierarchy, MLX5_GET(create_scheduling_element_in, in, scheduling_hierarchy)); MLX5_SET(destroy_scheduling_element_in, din, scheduling_element_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_scheduling_element_in, din, opcode, MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); break; case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in); - *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_vxlan_udp_dport_in, din, opcode, MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); break; case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in); - *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_l2_table_entry_in, din, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); break; case MLX5_CMD_OP_CREATE_QP: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, qpn, *obj_id); break; case MLX5_CMD_OP_CREATE_SRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, din, srqn, *obj_id); break; case MLX5_CMD_OP_CREATE_XRC_SRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_xrc_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, din, xrc_srqn, *obj_id); break; case MLX5_CMD_OP_CREATE_DCT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, dctn, *obj_id); break; case MLX5_CMD_OP_CREATE_XRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, din, xrqn, *obj_id); break; case MLX5_CMD_OP_ATTACH_TO_MCG: *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in); @@ -1180,16 +1270,19 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid), MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid), MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid)); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, din, opcode, + MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, din, qpn, *obj_id); break; case MLX5_CMD_OP_ALLOC_XRCD: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, din, opcode, + MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, din, xrcd, *obj_id); break; case MLX5_CMD_OP_CREATE_PSV: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_psv_in, din, opcode, MLX5_CMD_OP_DESTROY_PSV); - MLX5_SET(destroy_psv_in, din, psvn, - MLX5_GET(create_psv_out, out, psv0_index)); + MLX5_SET(destroy_psv_in, din, psvn, *obj_id); break; default: /* The entry must match to one of the devx_is_obj_create_cmd */ From 5bf0e4b80b2a8b52e8e986f8501e10d4a56c3a01 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 11 Jan 2021 14:57:51 +0000 Subject: [PATCH 066/414] IB/iser: Remove unneeded semicolons No need to add semicolon after closing bracket. Link: https://lore.kernel.org/r/20210111145754.56727-2-mgurtovoy@nvidia.com Reviewed-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iser_memory.c | 3 +-- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d4e057fac219..afec40da9b58 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -169,7 +169,7 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain) domain->sig.dif.ref_escape = true; if (sc->prot_flags & SCSI_PROT_REF_INCREMENT) domain->sig.dif.ref_remap = true; -}; +} static int iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) @@ -390,4 +390,3 @@ int iser_reg_mem_fastreg(struct iscsi_iser_task *task, return err; } - diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 2bd18b006893..136f6c4492e0 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -685,7 +685,7 @@ static void iser_cleanup_handler(struct rdma_cm_id *cma_id, iser_disconnected_handler(cma_id); iser_free_ib_conn_res(iser_conn, destroy); complete(&iser_conn->ib_completion); -}; +} static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { From 429c76133fbbf3cd590ac43882c2c2544d821ab0 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 11 Jan 2021 14:57:52 +0000 Subject: [PATCH 067/414] IB/iser: Protect iscsi_max_lun module param using callback Remove the check from the module_init function. Link: https://lore.kernel.org/r/20210111145754.56727-3-mgurtovoy@nvidia.com Reviewed-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.c | 27 ++++++++++++++++++------ 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 4792b9bf400f..906f52873d63 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -89,9 +89,15 @@ int iser_debug_level = 0; module_param_named(debug_level, iser_debug_level, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)"); +static int iscsi_iser_set(const char *val, const struct kernel_param *kp); +static const struct kernel_param_ops iscsi_iser_size_ops = { + .set = iscsi_iser_set, + .get = param_get_uint, +}; + static unsigned int iscsi_max_lun = 512; -module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); -MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session (default:512"); +module_param_cb(max_lun, &iscsi_iser_size_ops, &iscsi_max_lun, S_IRUGO); +MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session, should > 0 (default:512)"); unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS; module_param_named(max_sectors, iser_max_sectors, uint, S_IRUGO | S_IWUSR); @@ -110,6 +116,18 @@ int iser_pi_guard; module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO); MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]"); +static int iscsi_iser_set(const char *val, const struct kernel_param *kp) +{ + int ret; + unsigned int n = 0; + + ret = kstrtouint(val, 10, &n); + if (ret != 0 || n == 0) + return -EINVAL; + + return param_set_uint(val, kp); +} + /* * iscsi_iser_recv() - Process a successful recv completion * @conn: iscsi connection @@ -1009,11 +1027,6 @@ static int __init iser_init(void) iser_dbg("Starting iSER datamover...\n"); - if (iscsi_max_lun < 1) { - iser_err("Invalid max_lun value of %u\n", iscsi_max_lun); - return -EINVAL; - } - memset(&ig, 0, sizeof(struct iser_global)); ig.desc_cache = kmem_cache_create("iser_descriptors", From 6bd898baf2bb37968c909a5f057e1ccca90dbb3c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 11 Jan 2021 14:57:53 +0000 Subject: [PATCH 068/414] IB/iser: Enforce iser_max_sectors to be greater than 0 A value of 0 will casue the driver to fail establishing a valid connection to remote target. The following can be seen in the log in this case: iser: iser_connect: connecting to: 1.1.1.88:3260 iser: iser_cma_handler: address resolved (0): status 0 conn 00000000090aa4de id 00000000167d3b5a iser: iser_cma_handler: route resolved (2): status 0 conn 00000000090aa4de id 00000000167d3b5a iser: iscsi_iser_ep_poll: iser conn 00000000090aa4de rc = 0 iser: iser_create_ib_conn_res: setting conn 00000000090aa4de cma_id 00000000167d3b5a qp 00000000efa80660 max_send_wr 4619 iser_cma_handler: established (9): status 0 conn 00000000090aa4de id 00000000167d3b5a iser: iser_connected_handler: remote qpn:1c7 my qpn:1c6 iser: iser_connected_handler: conn 00000000090aa4de: negotiated remote invalidation iser: iscsi_iser_ep_poll: iser conn 00000000090aa4de rc = 1 scsi host10: iSCSI Initiator over iSER mlx5_core 0000:07:00.0: mlx5_cmd_check:769:(pid 616473): CREATE_MKEY(0x200) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x3bf6f) iser: iser_create_fastreg_desc: Failed to allocate ib_fast_reg_mr err=-22 iser: iser_alloc_rx_descriptors: failed allocating rx descriptors / data buffers iser: iscsi_iser_ep_disconnect: ep 00000000d2040785 iser conn 00000000090aa4de iser: iser_conn_terminate: iser_conn 00000000090aa4de state 3 iser: iser_free_ib_conn_res: freeing conn 00000000090aa4de cma_id 00000000167d3b5a qp 00000000efa80660 iser: iser_device_try_release: device 00000000dc871b1b refcount 0 Link: https://lore.kernel.org/r/20210111145754.56727-4-mgurtovoy@nvidia.com Reviewed-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 906f52873d63..fcfdeb5dea42 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -100,8 +100,9 @@ module_param_cb(max_lun, &iscsi_iser_size_ops, &iscsi_max_lun, S_IRUGO); MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session, should > 0 (default:512)"); unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS; -module_param_named(max_sectors, iser_max_sectors, uint, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command (default:1024"); +module_param_cb(max_sectors, &iscsi_iser_size_ops, &iser_max_sectors, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command, should > 0 (default:1024)"); bool iser_always_reg = true; module_param_named(always_register, iser_always_reg, bool, S_IRUGO); From 877745b47783869a3149cf691ebff56e2a557f53 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 11 Jan 2021 14:57:54 +0000 Subject: [PATCH 069/414] IB/iser: Simplify prot_caps setting Reduce the number of instructions made for setting protection caps. No need to do bitwise OR with 0 since we can zero the return value in the beginning of the function. Link: https://lore.kernel.org/r/20210111145754.56727-5-mgurtovoy@nvidia.com Reviewed-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index fcfdeb5dea42..8fcaa1136f2c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -590,13 +590,20 @@ iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) static inline unsigned int iser_dif_prot_caps(int prot_caps) { - return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? - SHOST_DIF_TYPE1_PROTECTION | SHOST_DIX_TYPE0_PROTECTION | - SHOST_DIX_TYPE1_PROTECTION : 0) | - ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? - SHOST_DIF_TYPE2_PROTECTION | SHOST_DIX_TYPE2_PROTECTION : 0) | - ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? - SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE3_PROTECTION : 0); + int ret = 0; + + if (prot_caps & IB_PROT_T10DIF_TYPE_1) + ret |= SHOST_DIF_TYPE1_PROTECTION | + SHOST_DIX_TYPE0_PROTECTION | + SHOST_DIX_TYPE1_PROTECTION; + if (prot_caps & IB_PROT_T10DIF_TYPE_2) + ret |= SHOST_DIF_TYPE2_PROTECTION | + SHOST_DIX_TYPE2_PROTECTION; + if (prot_caps & IB_PROT_T10DIF_TYPE_3) + ret |= SHOST_DIF_TYPE3_PROTECTION | + SHOST_DIX_TYPE3_PROTECTION; + + return ret; } /** From ab40530a2e0a7aca9a5187824c4fb072f3916e85 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Jan 2021 14:17:01 +0200 Subject: [PATCH 070/414] IB/mlx5: Add mutex destroy call to cap_mask_mutex mutex mutex_destroy() call for device's cap_mask_mutex mutex is missing, let's add it to annotate destruction. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Link: https://lore.kernel.org/r/20210113121703.559778-4-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d26f3f3e0462..4747cc16b391 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3929,7 +3929,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) mlx5_ib_cleanup_multiport_master(dev); WARN_ON(!xa_empty(&dev->odp_mkeys)); cleanup_srcu_struct(&dev->odp_srcu); - + mutex_destroy(&dev->cap_mask_mutex); WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); } @@ -3980,6 +3980,10 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->ib_dev.dev.parent = mdev->device; dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; + err = init_srcu_struct(&dev->odp_srcu); + if (err) + goto err_mp; + mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); @@ -3989,11 +3993,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; - - err = init_srcu_struct(&dev->odp_srcu); - if (err) - goto err_mp; - return 0; err_mp: From 559a3eacc4e5f17317e77774462317f785b0cc23 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Jan 2021 14:17:02 +0200 Subject: [PATCH 071/414] IB/mlx5: Make function static mlx5_query_mad_ifc_smp_attr_node_info() is internal to mad.c Hence, make it static. Link: https://lore.kernel.org/r/20210113121703.559778-5-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mad.c | 4 ++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9bb9bb058932..53dec6063245 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -308,8 +308,8 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) return err; } -int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, - struct ib_smp *out_mad) +static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, + struct ib_smp *out_mad) { struct ib_smp *in_mad = NULL; int err = -ENOMEM; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b0fdc1b08e06..acc56eaa7356 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1280,8 +1280,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); -int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, - struct ib_smp *out_mad); int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid); int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev, From 390e9ab4632f22eed601c6a4ee078e940660242c Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:10 +0000 Subject: [PATCH 072/414] RDMA/hw: i40iw_hmc: Fix misspellings of '*idx' args Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/i40iw/i40iw_hmc.c:64: warning: Function parameter or member 'idx' not described in 'i40iw_find_sd_index_limit' drivers/infiniband/hw/i40iw/i40iw_hmc.c:64: warning: Excess function parameter 'index' description in 'i40iw_find_sd_index_limit' drivers/infiniband/hw/i40iw/i40iw_hmc.c:94: warning: Function parameter or member 'pd_idx' not described in 'i40iw_find_pd_index_limit' drivers/infiniband/hw/i40iw/i40iw_hmc.c:94: warning: Excess function parameter 'pd_index' description in 'i40iw_find_pd_index_limit' Link: https://lore.kernel.org/r/20210118223929.512175-2-lee.jones@linaro.org Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_hmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.c b/drivers/infiniband/hw/i40iw/i40iw_hmc.c index 5484cbf55f0f..8bd72af9e099 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hmc.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hmc.c @@ -46,7 +46,7 @@ * i40iw_find_sd_index_limit - finds segment descriptor index limit * @hmc_info: pointer to the HMC configuration information structure * @type: type of HMC resources we're searching - * @index: starting index for the object + * @idx: starting index for the object * @cnt: number of objects we're trying to create * @sd_idx: pointer to return index of the segment descriptor in question * @sd_limit: pointer to return the maximum number of segment descriptors @@ -78,7 +78,7 @@ static inline void i40iw_find_sd_index_limit(struct i40iw_hmc_info *hmc_info, * @type: HMC resource type we're examining * @idx: starting index for the object * @cnt: number of objects we're trying to create - * @pd_index: pointer to return page descriptor index + * @pd_idx: pointer to return page descriptor index * @pd_limit: pointer to return page descriptor index limit * * Calculates the page descriptor index and index limit for the resource From 4c3b53e14cdf29af2a565bfda2e3dbeede5f39a3 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:11 +0000 Subject: [PATCH 073/414] RDMA/core: device: Fix formatting in worthy kernel-doc header and demote another Fixes the following W=1 kernel build warning(s): drivers/infiniband/core/device.c:1896: warning: Function parameter or member 'ibdev' not described in 'ib_get_client_nl_info' drivers/infiniband/core/device.c:1896: warning: Function parameter or member 'client_name' not described in 'ib_get_client_nl_info' drivers/infiniband/core/device.c:1896: warning: Function parameter or member 'res' not described in 'ib_get_client_nl_info' drivers/infiniband/core/device.c:2328: warning: Function parameter or member 'nldev_cb' not described in 'ib_enum_all_devs' drivers/infiniband/core/device.c:2328: warning: Function parameter or member 'skb' not described in 'ib_enum_all_devs' Link: https://lore.kernel.org/r/20210118223929.512175-3-lee.jones@linaro.org Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e96f979e6d52..3d08373c7797 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1887,9 +1887,9 @@ static int __ib_get_client_nl_info(struct ib_device *ibdev, /** * ib_get_client_nl_info - Fetch the nl_info from a client - * @device - IB device - * @client_name - Name of the client - * @res - Result of the query + * @ibdev: IB device + * @client_name: Name of the client + * @res: Result of the query */ int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name, struct ib_client_nl_info *res) @@ -2317,7 +2317,7 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, up_read(&devices_rwsem); } -/** +/* * ib_enum_all_devs - enumerate all ib_devices * @cb: Callback to call for each found ib_device * From 14fa9fe5a612555064d4c402dc0e315b8568a501 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:12 +0000 Subject: [PATCH 074/414] RDMA/hw/i40iw/i40iw_ctrl: Fix a bunch of misspellings and formatting issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/i40iw/i40iw_ctrl.c:193: warning: Function parameter or member 'obj_info' not described in 'i40iw_sc_decode_fpm_query' drivers/infiniband/hw/i40iw/i40iw_ctrl.c:193: warning: Excess function parameter 'info' description in 'i40iw_sc_decode_fpm_query' drivers/infiniband/hw/i40iw/i40iw_ctrl.c:218: warning: Function parameter or member 'hmc_info' not described in 'i40iw_sc_parse_fpm_query_buf' drivers/infiniband/hw/i40iw/i40iw_ctrl.c:218: warning: Excess function parameter 'info' description in 'i40iw_sc_parse_fpm_query_buf' drivers/infiniband/hw/i40iw/i40iw_ctrl.c:784: warning: Function parameter or member 'compl_info' not described in 'i40iw_sc_poll_for_cqp_op_done' drivers/infiniband/hw/i40iw/i40iw_ctrl.c:784: warning: Excess function parameter 'info' description in 'i40iw_sc_poll_for_cqp_op_done' drivers/infiniband/hw/i40iw/i40iw_ctrl.c:947: warning: Function parameter or member 'commit_fpm_mem' not described in 'i40iw_sc_commit_fpm_values' drivers/infiniband/hw/i40iw/i40iw_ctrl.c:1032: warning: Function parameter or member 'dev' not described in 'i40iw_get_rdma_features' drivers/infiniband/hw/i40iw/i40iw_ctrl.c:1468: warning: Function parameter or member 'ignore_ref_count' not described in 'i40iw_sc_del_local_mac_ipaddr_entry' drivers/infiniband/hw/i40iw/i40iw_ctrl.c:2314: warning: Function parameter or member 'scratch' not described in 'i40iw_sc_cq_modify' drivers/infiniband/hw/i40iw/i40iw_ctrl.c:3682: warning: Function parameter or member 'info' not described in 'cqp_sds_wqe_fill' drivers/infiniband/hw/i40iw/i40iw_ctrl.c:4894: warning: Function parameter or member 'stats' not described in 'i40iw_hw_stats_read_32' drivers/infiniband/hw/i40iw/i40iw_ctrl.c:4894: warning: Excess function parameter 'stat' description in 'i40iw_hw_stats_read_32' Link: https://lore.kernel.org/r/20210118223929.512175-4-lee.jones@linaro.org Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index c943d491b72b..eaea5d545eb8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -181,7 +181,7 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf( * i40iw_sc_decode_fpm_query() - Decode a 64 bit value into max count and size * @buf: ptr to fpm query buffer * @buf_idx: index into buf - * @info: ptr to i40iw_hmc_obj_info struct + * @obj_info: ptr to i40iw_hmc_obj_info struct * @rsrc_idx: resource index into info * * Decode a 64 bit value from fpm query buffer into max count and size @@ -205,7 +205,7 @@ static u64 i40iw_sc_decode_fpm_query(u64 *buf, /** * i40iw_sc_parse_fpm_query_buf() - parses fpm query buffer * @buf: ptr to fpm query buffer - * @info: ptr to i40iw_hmc_obj_info struct + * @hmc_info: ptr to i40iw_hmc_obj_info struct * @hmc_fpm_misc: ptr to fpm data * * parses fpm query buffer and copy max_cnt and @@ -775,7 +775,7 @@ static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info( * i40iw_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ * @cqp: struct for cqp hw * @op_code: cqp opcode for completion - * @info: completion q entry to return + * @compl_info: completion q entry to return */ static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done( struct i40iw_sc_cqp *cqp, @@ -933,7 +933,7 @@ static enum i40iw_status_code i40iw_sc_commit_fpm_values_done(struct i40iw_sc_cq * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @hmc_fn_id: hmc function id - * @commit_fpm_mem; Memory for fpm values + * @commit_fpm_mem: Memory for fpm values * @post_sq: flag for cqp db to ring * @wait_type: poll ccq or cqp registers for cqp completion */ @@ -1026,7 +1026,7 @@ i40iw_sc_query_rdma_features(struct i40iw_sc_cqp *cqp, /** * i40iw_get_rdma_features - get RDMA features - * @dev - sc device struct + * @dev: sc device struct */ enum i40iw_status_code i40iw_get_rdma_features(struct i40iw_sc_dev *dev) { @@ -1456,7 +1456,7 @@ static enum i40iw_status_code i40iw_sc_add_local_mac_ipaddr_entry( * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @entry_idx: index of mac entry - * @ ignore_ref_count: to force mac adde delete + * @ignore_ref_count: to force mac adde delete * @post_sq: flag for cqp db to ring */ static enum i40iw_status_code i40iw_sc_del_local_mac_ipaddr_entry( @@ -2304,7 +2304,7 @@ static enum i40iw_status_code i40iw_sc_cq_destroy(struct i40iw_sc_cq *cq, * i40iw_sc_cq_modify - modify a Completion Queue * @cq: cq struct * @info: modification info struct - * @scratch: + * @scratch: u64 saved to be used during cqp completion * @post_sq: flag to post to sq */ static enum i40iw_status_code i40iw_sc_cq_modify(struct i40iw_sc_cq *cq, @@ -3673,7 +3673,7 @@ static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev /** * cqp_sds_wqe_fill - fill cqp wqe doe sd * @cqp: struct for cqp hw - * @info; sd info for wqe + * @info: sd info for wqe * @scratch: u64 saved to be used during cqp completion */ static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp, @@ -4884,7 +4884,7 @@ void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 fcn_idx, bool is_pf) /** * i40iw_hw_stats_read_32 - Read 32-bit HW stats counters and accommodates for roll-overs. - * @stat: pestat struct + * @stats: pestat struct * @index: index in HW stats table which contains offset reg-addr * @value: hw stats value */ From 03e2dbbb3e819e89f6b44ee6ac604b55281c0417 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:13 +0000 Subject: [PATCH 075/414] RDMA/hw/i40iw/i40iw_cm: Fix a bunch of function documentation issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/i40iw/i40iw_cm.c:76: warning: Function parameter or member 'bufp' not described in 'i40iw_free_sqbuf' drivers/infiniband/hw/i40iw/i40iw_cm.c:76: warning: Excess function parameter 'buf' description in 'i40iw_free_sqbuf' drivers/infiniband/hw/i40iw/i40iw_cm.c:737: warning: Function parameter or member 'start_addr' not described in 'i40iw_build_mpa_v1' drivers/infiniband/hw/i40iw/i40iw_cm.c:1059: warning: Function parameter or member 'cm_node' not described in 'i40iw_schedule_cm_timer' drivers/infiniband/hw/i40iw/i40iw_cm.c:1211: warning: Function parameter or member 't' not described in 'i40iw_cm_timer_tick' drivers/infiniband/hw/i40iw/i40iw_cm.c:1211: warning: Excess function parameter 'pass' description in 'i40iw_cm_timer_tick' drivers/infiniband/hw/i40iw/i40iw_cm.c:1475: warning: Function parameter or member 'vlan_id' not described in 'i40iw_find_listener' drivers/infiniband/hw/i40iw/i40iw_cm.c:1527: warning: Function parameter or member 'port' not described in 'i40iw_find_port' drivers/infiniband/hw/i40iw/i40iw_cm.c:1527: warning: Excess function parameter 'accelerated_list' description in 'i40iw_find_port' drivers/infiniband/hw/i40iw/i40iw_cm.c:1843: warning: Function parameter or member 'listener' not described in 'i40iw_dec_refcnt_listen' drivers/infiniband/hw/i40iw/i40iw_cm.c:2037: warning: Function parameter or member 'src_addr' not described in 'i40iw_get_dst_ipv6' drivers/infiniband/hw/i40iw/i40iw_cm.c:2037: warning: Function parameter or member 'dst_addr' not described in 'i40iw_get_dst_ipv6' drivers/infiniband/hw/i40iw/i40iw_cm.c:2061: warning: Function parameter or member 'src' not described in 'i40iw_addr_resolve_neigh_ipv6' drivers/infiniband/hw/i40iw/i40iw_cm.c:2061: warning: Function parameter or member 'dest' not described in 'i40iw_addr_resolve_neigh_ipv6' drivers/infiniband/hw/i40iw/i40iw_cm.c:2061: warning: Excess function parameter 'dst_ip' description in 'i40iw_addr_resolve_neigh_ipv6' drivers/infiniband/hw/i40iw/i40iw_cm.c:3011: warning: Function parameter or member 'pdata' not described in 'i40iw_cm_reject' drivers/infiniband/hw/i40iw/i40iw_cm.c:3011: warning: Excess function parameter 'pdate' description in 'i40iw_cm_reject' drivers/infiniband/hw/i40iw/i40iw_cm.c:4312: warning: Function parameter or member 'nfo' not described in 'i40iw_cm_teardown_connections' drivers/infiniband/hw/i40iw/i40iw_cm.c:4312: warning: Excess function parameter 'ipv4' description in 'i40iw_cm_teardown_connections' drivers/infiniband/hw/i40iw/i40iw_cm.c:4367: warning: Function parameter or member 'netdev' not described in 'i40iw_if_notify' Link: https://lore.kernel.org/r/20210118223929.512175-5-lee.jones@linaro.org Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 9acc0ecc9a43..ac65c8237b2e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -70,7 +70,7 @@ static void i40iw_disconnect_worker(struct work_struct *work); /** * i40iw_free_sqbuf - put back puda buffer if refcount = 0 * @vsi: pointer to vsi structure - * @buf: puda buffer to free + * @bufp: puda buffer to free */ void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp) { @@ -729,6 +729,7 @@ static int i40iw_handle_tcp_options(struct i40iw_cm_node *cm_node, /** * i40iw_build_mpa_v1 - build a MPA V1 frame * @cm_node: connection's node + * @start_addr: MPA frame start address * @mpa_key: to do read0 or write0 */ static void i40iw_build_mpa_v1(struct i40iw_cm_node *cm_node, @@ -1040,7 +1041,7 @@ static int i40iw_parse_mpa(struct i40iw_cm_node *cm_node, u8 *buffer, u32 *type, /** * i40iw_schedule_cm_timer - * @@cm_node: connection's node + * @cm_node: connection's node * @sqbuf: buffer to send * @type: if it is send or close * @send_retrans: if rexmits to be done @@ -1205,7 +1206,7 @@ static void i40iw_build_timer_list(struct list_head *timer_list, /** * i40iw_cm_timer_tick - system's timer expired callback - * @pass: Pointing to cm_core + * @t: Timer instance to fetch the cm_core pointer from */ static void i40iw_cm_timer_tick(struct timer_list *t) { @@ -1463,6 +1464,7 @@ struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core, * @cm_core: cm's core * @dst_port: listener tcp port num * @dst_addr: listener ip addr + * @vlan_id: vlan id for the given address * @listener_state: state to match with listen node's */ static struct i40iw_cm_listener *i40iw_find_listener( @@ -1521,7 +1523,7 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core, /** * i40iw_find_port - find port that matches reference port * @hte: ptr to accelerated or non-accelerated list - * @accelerated_list: flag for accelerated vs non-accelerated list + * @port: port number to locate */ static bool i40iw_find_port(struct list_head *hte, u16 port) { @@ -1834,6 +1836,7 @@ static enum i40iw_status_code i40iw_add_mqh_4( /** * i40iw_dec_refcnt_listen - delete listener and associated cm nodes * @cm_core: cm's core + * @listener: passive connection's listener * @free_hanging_nodes: to free associated cm_nodes * @apbvt_del: flag to delete the apbvt */ @@ -2029,7 +2032,7 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev, return rc; } -/** +/* * i40iw_get_dst_ipv6 */ static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr, @@ -2051,7 +2054,8 @@ static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr, /** * i40iw_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address * @iwdev: iwarp device structure - * @dst_ip: remote ip address + * @src: source ip address + * @dest: remote ip address * @arpindex: if there is an arp entry */ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, @@ -3004,7 +3008,7 @@ static struct i40iw_cm_node *i40iw_create_cm_node( /** * i40iw_cm_reject - reject and teardown a connection * @cm_node: connection's node - * @pdate: ptr to private data for reject + * @pdata: ptr to private data for reject * @plen: size of private data */ static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 plen) @@ -4302,7 +4306,7 @@ static void i40iw_qhash_ctrl(struct i40iw_device *iwdev, * i40iw_cm_teardown_connections - teardown QPs * @iwdev: device pointer * @ipaddr: Pointer to IPv4 or IPv6 address - * @ipv4: flag indicating IPv4 when true + * @nfo: cm info node * @disconnect_all: flag indicating disconnect all QPs * teardown QPs where source or destination addr matches ip addr */ @@ -4358,6 +4362,7 @@ void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr, /** * i40iw_ifdown_notify - process an ifdown on an interface * @iwdev: device pointer + * @netdev: network interface device structure * @ipaddr: Pointer to IPv4 or IPv6 address * @ipv4: flag indicating IPv4 when true * @ifup: flag indicating interface up when true From 3c976761ad049777cacd53e5171f0cdeffa1ff60 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:14 +0000 Subject: [PATCH 076/414] RDMA/core/cache: Fix some misspellings, missing and superfluous param descriptions Fixes the following W=1 kernel build warning(s): drivers/infiniband/core/cache.c:688: warning: Function parameter or member 'ib_dev' not described in 'rdma_find_gid_by_port' drivers/infiniband/core/cache.c:688: warning: Function parameter or member 'port' not described in 'rdma_find_gid_by_port' drivers/infiniband/core/cache.c:688: warning: Excess function parameter 'device' description in 'rdma_find_gid_by_port' drivers/infiniband/core/cache.c:688: warning: Excess function parameter 'port_num' description in 'rdma_find_gid_by_port' drivers/infiniband/core/cache.c:741: warning: Function parameter or member 'ib_dev' not described in 'rdma_find_gid_by_filter' drivers/infiniband/core/cache.c:741: warning: Function parameter or member 'context' not described in 'rdma_find_gid_by_filter' drivers/infiniband/core/cache.c:741: warning: Excess function parameter 'device' description in 'rdma_find_gid_by_filter' drivers/infiniband/core/cache.c:1263: warning: Excess function parameter 'num_entries' description in 'rdma_query_gid_table' Link: https://lore.kernel.org/r/20210118223929.512175-6-lee.jones@linaro.org Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 7989b7e1d1c0..5c9fac7cf420 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -669,11 +669,10 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, * rdma_find_gid_by_port - Returns the GID entry attributes when it finds * a valid GID entry for given search parameters. It searches for the specified * GID value in the local software cache. - * @device: The device to query. + * @ib_dev: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. - * @port_num: The port number of the device where the GID value should be - * searched. + * @port: The port number of the device where the GID value should be searched. * @ndev: In RoCE, the net device of the device. NULL means ignore. * * Returns sgid attributes if the GID is found with valid reference or @@ -719,7 +718,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port); /** * rdma_find_gid_by_filter - Returns the GID table attribute where a * specified GID value occurs - * @device: The device to query. + * @ib_dev: The device to query. * @gid: The GID value to search for. * @port: The port number of the device where the GID value could be * searched. @@ -728,6 +727,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port); * otherwise, we continue searching the GID table. It's guaranteed that * while filter is executed, ndev field is valid and the structure won't * change. filter is executed in an atomic context. filter must not be NULL. + * @context: Private data to pass into the call-back. * * rdma_find_gid_by_filter() searches for the specified GID value * of which the filter function returns true in the port's GID table. @@ -1253,7 +1253,6 @@ EXPORT_SYMBOL(rdma_get_gid_attr); * @entries: Entries where GID entries are returned. * @max_entries: Maximum number of entries that can be returned. * Entries array must be allocated to hold max_entries number of entries. - * @num_entries: Updated to the number of entries that were successfully read. * * Returns number of entries on success or appropriate error code. */ From 263520f2dd271e2f11a68b9c47670804c6d645f6 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:15 +0000 Subject: [PATCH 077/414] RDMA/hw/i40iw/i40iw_hw: Provide description for 'ipv4', remove 'user_pri' and fix 'iwcq' Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/i40iw/i40iw_hw.c:172: warning: Function parameter or member 'iwcq' not described in 'i40iw_iwarp_ce_handler' drivers/infiniband/hw/i40iw/i40iw_hw.c:172: warning: Excess function parameter 'iwcp' description in 'i40iw_iwarp_ce_handler' drivers/infiniband/hw/i40iw/i40iw_hw.c:529: warning: Function parameter or member 'ipv4' not described in 'i40iw_manage_arp_cache' drivers/infiniband/hw/i40iw/i40iw_hw.c:592: warning: Excess function parameter 'user_pri' description in 'i40iw_manage_qhash' Link: https://lore.kernel.org/r/20210118223929.512175-7-lee.jones@linaro.org Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_hw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 56fdc161f6f8..d167ac10c751 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -165,7 +165,7 @@ static void i40iw_cqp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq /** * i40iw_iwarp_ce_handler - handle iwarp completions * @iwdev: iwarp device - * @iwcp: iwarp cq receiving event + * @iwcq: iwarp cq receiving event */ static void i40iw_iwarp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq *iwcq) @@ -519,6 +519,7 @@ enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev, * @iwdev: iwarp device * @mac_addr: mac address ptr * @ip_addr: ip addr for arp cache + * @ipv4: flag indicating IPv4 when true * @action: add, delete or modify */ void i40iw_manage_arp_cache(struct i40iw_device *iwdev, @@ -581,7 +582,6 @@ static void i40iw_send_syn_cqp_callback(struct i40iw_cqp_request *cqp_request, u * @mtype: type of qhash * @cmnode: cmnode associated with connection * @wait: wait for completion - * @user_pri:user pri of the connection */ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, struct i40iw_cm_info *cminfo, From c5e2ee410ba80296259e4466e7a6b1a297ffac62 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:16 +0000 Subject: [PATCH 078/414] RDMA/hw/i40iw/i40iw_main: Rectify some kernel-doc misdemeanours Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/i40iw/i40iw_main.c:192: warning: Function parameter or member 't' not described in 'i40iw_dpc' drivers/infiniband/hw/i40iw/i40iw_main.c:192: warning: Excess function parameter 'data' description in 'i40iw_dpc' drivers/infiniband/hw/i40iw/i40iw_main.c:206: warning: Function parameter or member 't' not described in 'i40iw_ceq_dpc' drivers/infiniband/hw/i40iw/i40iw_main.c:206: warning: Excess function parameter 'data' description in 'i40iw_ceq_dpc' drivers/infiniband/hw/i40iw/i40iw_main.c:236: warning: Function parameter or member 'free_hwcqp' not described in 'i40iw_destroy_cqp' drivers/infiniband/hw/i40iw/i40iw_main.c:236: warning: Excess function parameter 'create_done' description in 'i40iw_destroy_cqp' drivers/infiniband/hw/i40iw/i40iw_main.c:264: warning: Function parameter or member 'msix_vec' not described in 'i40iw_disable_irq' drivers/infiniband/hw/i40iw/i40iw_main.c:264: warning: Excess function parameter 'msic_vec' description in 'i40iw_disable_irq' drivers/infiniband/hw/i40iw/i40iw_main.c:407: warning: Function parameter or member 'dev' not described in 'i40iw_close_hmc_objects_type' drivers/infiniband/hw/i40iw/i40iw_main.c:407: warning: Function parameter or member 'hmc_info' not described in 'i40iw_close_hmc_objects_type' drivers/infiniband/hw/i40iw/i40iw_main.c:407: warning: Excess function parameter 'iwdev' description in 'i40iw_close_hmc_objects_type' drivers/infiniband/hw/i40iw/i40iw_main.c:443: warning: Function parameter or member 'irq' not described in 'i40iw_ceq_handler' drivers/infiniband/hw/i40iw/i40iw_main.c:1786: warning: Function parameter or member 'reset' not described in 'i40iw_close' Link: https://lore.kernel.org/r/20210118223929.512175-8-lee.jones@linaro.org Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Jason Gunthorpe Cc: Intel Corporation Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 584932d3cc44..ab4cb11950dc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -186,7 +186,7 @@ static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id) /** * i40iw_dpc - tasklet for aeq and ceq 0 - * @data: iwarp device + * @t: Timer context to fetch pointer to iwarp device */ static void i40iw_dpc(struct tasklet_struct *t) { @@ -200,7 +200,7 @@ static void i40iw_dpc(struct tasklet_struct *t) /** * i40iw_ceq_dpc - dpc handler for CEQ - * @data: data points to CEQ + * @t: Timer context to fetch pointer to CEQ data */ static void i40iw_ceq_dpc(struct tasklet_struct *t) { @@ -227,7 +227,7 @@ static irqreturn_t i40iw_irq_handler(int irq, void *data) /** * i40iw_destroy_cqp - destroy control qp * @iwdev: iwarp device - * @create_done: 1 if cqp create poll was success + * @free_hwcqp: 1 if CQP should be destroyed * * Issue destroy cqp request and * free the resources associated with the cqp @@ -253,7 +253,7 @@ static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp) /** * i40iw_disable_irqs - disable device interrupts * @dev: hardware control device structure - * @msic_vec: msix vector to disable irq + * @msix_vec: msix vector to disable irq * @dev_id: parameter to pass to free_irq (used during irq setup) * * The function is called when destroying aeq/ceq @@ -394,8 +394,9 @@ static enum i40iw_hmc_rsrc_type iw_hmc_obj_types[] = { /** * i40iw_close_hmc_objects_type - delete hmc objects of a given type - * @iwdev: iwarp device + * @dev: iwarp device * @obj_type: the hmc object type to be deleted + * @hmc_info: pointer to the HMC configuration information * @is_pf: true if the function is PF otherwise false * @reset: true if called before reset */ @@ -437,6 +438,7 @@ static void i40iw_del_hmc_objects(struct i40iw_sc_dev *dev, /** * i40iw_ceq_handler - interrupt handler for ceq + * @irq: interrupt request number * @data: ceq pointer */ static irqreturn_t i40iw_ceq_handler(int irq, void *data) @@ -1777,6 +1779,7 @@ static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *cli /** * i40iw_close - client interface operation close for iwarp/uda device * @ldev: lan device information + * @reset: true if called before reset * @client: client to close * * Called by the lan driver during the processing of client unregister From d71f5fa2f5f28ab1037105c7ec1f4bba028fea40 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:17 +0000 Subject: [PATCH 079/414] RDMA/core/roce_gid_mgmt: Fix misnaming of 'rdma_roce_rescan_device()'s param 'ib_dev' Fixes the following W=1 kernel build warning(s): drivers/infiniband/core/roce_gid_mgmt.c:511: warning: Function parameter or member 'ib_dev' not described in 'rdma_roce_rescan_device' drivers/infiniband/core/roce_gid_mgmt.c:511: warning: Excess function parameter 'device' description in 'rdma_roce_rescan_device' Link: https://lore.kernel.org/r/20210118223929.512175-9-lee.jones@linaro.org Cc: Doug Ledford Cc: Jason Gunthorpe Cc: Taehee Yoo Cc: "David S. Miller" Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/roce_gid_mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 6b8364bb032d..34fff94eaa38 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -505,7 +505,7 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, * rdma_roce_rescan_device - Rescan all of the network devices in the system * and add their gids, as needed, to the relevant RoCE devices. * - * @device: the rdma device + * @ib_dev: the rdma device */ void rdma_roce_rescan_device(struct ib_device *ib_dev) { From fa2e3a72a28e947d8ab3cfd58a9f41c48d7b2cdb Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:18 +0000 Subject: [PATCH 080/414] RDMA/hw/i40iw/i40iw_pble: Provide description for 'dev' and fix formatting issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/i40iw/i40iw_pble.c:60: warning: Function parameter or member 'dev' not described in 'i40iw_destroy_pble_pool' drivers/infiniband/hw/i40iw/i40iw_pble.c:120: warning: Function parameter or member 'pble_rsrc' not described in 'get_sd_pd_idx' drivers/infiniband/hw/i40iw/i40iw_pble.c:120: warning: Function parameter or member 'idx' not described in 'get_sd_pd_idx' Link: https://lore.kernel.org/r/20210118223929.512175-10-lee.jones@linaro.org Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_pble.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c index 5f97643e22e5..53e5cd1a2bd6 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_pble.c +++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c @@ -54,6 +54,7 @@ static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chun /** * i40iw_destroy_pble_pool - destroy pool during module unload + * @dev: i40iw_sc_dev struct * @pble_rsrc: pble resources */ void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc) @@ -112,8 +113,8 @@ enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev, /** * get_sd_pd_idx - Returns sd index, pd index and rel_pd_idx from fpm address - * @ pble_rsrc: structure containing fpm address - * @ idx: where to return indexes + * @pble_rsrc: structure containing fpm address + * @idx: where to return indexes */ static inline void get_sd_pd_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc, struct sd_pd_idx *idx) From 0c962472d67eb30afafdac1125e3da951c07c564 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:19 +0000 Subject: [PATCH 081/414] RDMA/hw/i40iw/i40iw_puda: Fix some misspellings and provide missing descriptions Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/i40iw/i40iw_puda.c:517: warning: Function parameter or member 'dev' not described in 'i40iw_puda_qp_wqe' drivers/infiniband/hw/i40iw/i40iw_puda.c:517: warning: Function parameter or member 'qp' not described in 'i40iw_puda_qp_wqe' drivers/infiniband/hw/i40iw/i40iw_puda.c:517: warning: Excess function parameter 'rsrc' description in 'i40iw_puda_qp_wqe' drivers/infiniband/hw/i40iw/i40iw_puda.c:629: warning: Function parameter or member 'dev' not described in 'i40iw_puda_cq_wqe' drivers/infiniband/hw/i40iw/i40iw_puda.c:629: warning: Function parameter or member 'cq' not described in 'i40iw_puda_cq_wqe' drivers/infiniband/hw/i40iw/i40iw_puda.c:629: warning: Excess function parameter 'rsrc' description in 'i40iw_puda_cq_wqe' drivers/infiniband/hw/i40iw/i40iw_puda.c:792: warning: Function parameter or member 'vsi' not described in 'i40iw_puda_dele_resources' drivers/infiniband/hw/i40iw/i40iw_puda.c:792: warning: Excess function parameter 'dev' description in 'i40iw_puda_dele_resources' drivers/infiniband/hw/i40iw/i40iw_puda.c:884: warning: Function parameter or member 'vsi' not described in 'i40iw_puda_create_rsrc' drivers/infiniband/hw/i40iw/i40iw_puda.c:884: warning: Excess function parameter 'dev' description in 'i40iw_puda_create_rsrc' drivers/infiniband/hw/i40iw/i40iw_puda.c:1135: warning: Function parameter or member 'pfpdu' not described in 'i40iw_ieq_create_pbufl' drivers/infiniband/hw/i40iw/i40iw_puda.c:1442: warning: Function parameter or member 'vsi' not described in 'i40iw_ieq_receive' drivers/infiniband/hw/i40iw/i40iw_puda.c:1442: warning: Excess function parameter 'dev' description in 'i40iw_ieq_receive' Link: https://lore.kernel.org/r/20210118223929.512175-11-lee.jones@linaro.org Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_puda.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index 924be4b03c9a..d1c8cc0a6236 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -511,7 +511,8 @@ static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc) /** * i40iw_puda_qp_wqe - setup wqe for qp create - * @rsrc: resource for qp + * @dev: iwarp device + * @qp: resource for qp */ static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) { @@ -623,7 +624,8 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc) /** * i40iw_puda_cq_wqe - setup wqe for cq create - * @rsrc: resource for cq + * @dev: iwarp device + * @cq: cq to setup */ static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq) { @@ -782,7 +784,7 @@ static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc) /** * i40iw_puda_dele_resources - delete all resources during close - * @dev: iwarp device + * @vsi: pointer to vsi structure * @type: type of resource to dele * @reset: true if reset chip */ @@ -876,7 +878,7 @@ static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc, /** * i40iw_puda_create_rsrc - create resouce (ilq or ieq) - * @dev: iwarp device + * @vsi: pointer to vsi structure * @info: resource information */ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi, @@ -1121,6 +1123,7 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq, /** * i40iw_ieq_create_pbufl - create buffer list for single fpdu + * @pfpdu: partial management per user qp * @rxlist: resource list for receive ieq buffes * @pbufl: temp. list for buffers for fpddu * @buf: first receive buffer @@ -1434,7 +1437,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq, /** * i40iw_ieq_receive - received exception buffer - * @dev: iwarp device + * @vsi: pointer to vsi structure * @buf: exception buffer received */ static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi, From 1d3194f1d0a4890e8ddcacfdfcb1b3aa08a7c154 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:20 +0000 Subject: [PATCH 082/414] RDMA/core/multicast: Provide description for 'ib_init_ah_from_mcmember()'s 'rec' param Fixes the following W=1 kernel build warning(s): drivers/infiniband/core/multicast.c:739: warning: Function parameter or member 'rec' not described in 'ib_init_ah_from_mcmember' Link: https://lore.kernel.org/r/20210118223929.512175-12-lee.jones@linaro.org Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/multicast.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 740f03ecc05d..57519ca6cd2c 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -721,6 +721,7 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec); * member record and gid of the device. * @device: RDMA device * @port_num: Port of the rdma device to consider + * @rec: Multicast member record to use * @ndev: Optional netdevice, applicable only for RoCE * @gid_type: GID type to consider * @ah_attr: AH attribute to fillup on successful completion From cf5dd4a891b75cb380baad0948f5825f44492441 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:21 +0000 Subject: [PATCH 083/414] RDMA/core/sa_query: Demote non-conformant kernel-doc header Fixes the following W=1 kernel build warning(s): drivers/infiniband/core/sa_query.c:1449: warning: Function parameter or member 'client' not described in 'opa_pr_query_possible' drivers/infiniband/core/sa_query.c:1449: warning: Function parameter or member 'sa_dev' not described in 'opa_pr_query_possible' drivers/infiniband/core/sa_query.c:1449: warning: Function parameter or member 'device' not described in 'opa_pr_query_possible' drivers/infiniband/core/sa_query.c:1449: warning: Function parameter or member 'port_num' not described in 'opa_pr_query_possible' drivers/infiniband/core/sa_query.c:1449: warning: Function parameter or member 'rec' not described in 'opa_pr_query_possible' Link: https://lore.kernel.org/r/20210118223929.512175-13-lee.jones@linaro.org Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sa_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 89a831fa1885..5bd047042e68 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1434,7 +1434,7 @@ enum opa_pr_supported { PR_IB_SUPPORTED }; -/** +/* * opa_pr_query_possible - Check if current PR query can be an OPA query. * * Retuns PR_NOT_SUPPORTED if a path record query is not From d246bbdcc757b2d1b0c9987b26d9849997d587e7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:22 +0000 Subject: [PATCH 084/414] RDMA/hw/i40iw/i40iw_uk: Clean-up some function documentation headers Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/i40iw/i40iw_uk.c:129: warning: Function parameter or member 'total_size' not described in 'i40iw_qp_get_next_send_wqe' drivers/infiniband/hw/i40iw/i40iw_uk.c:129: warning: Function parameter or member 'wr_id' not described in 'i40iw_qp_get_next_send_wqe' drivers/infiniband/hw/i40iw/i40iw_uk.c:724: warning: Excess function parameter 'post_cq' description in 'i40iw_cq_poll_completion' drivers/infiniband/hw/i40iw/i40iw_uk.c:1058: warning: Function parameter or member 'queue' not described in 'i40iw_clean_cq' Link: https://lore.kernel.org/r/20210118223929.512175-14-lee.jones@linaro.org Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_uk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index c3633c9944db..f521be16bf31 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -119,6 +119,8 @@ void i40iw_qp_post_wr(struct i40iw_qp_uk *qp) * @qp: hw qp ptr * @wqe_idx: return wqe index * @wqe_size: size of sq wqe + * @total_size: work request length + * @wr_id: work request id */ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx, @@ -717,7 +719,6 @@ static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq, * i40iw_cq_poll_completion - get cq completion info * @cq: hw cq * @info: cq poll information returned - * @post_cq: update cq tail */ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, struct i40iw_cq_poll_info *info) @@ -1051,7 +1052,7 @@ void i40iw_device_init_uk(struct i40iw_dev_uk *dev) /** * i40iw_clean_cq - clean cq entries - * @ queue completion context + * @queue: completion context * @cq: cq to clean */ void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq) From dcef82fad6daf5f318277cc1c4042589e975a0ad Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:23 +0000 Subject: [PATCH 085/414] RDMA/hw/i40iw/i40iw_virtchnl: Fix a bunch of kernel-doc issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:132: warning: Function parameter or member 'rsrc_type' not described in 'vchnl_vf_send_add_hmc_objs_req' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:132: warning: Function parameter or member 'start_index' not described in 'vchnl_vf_send_add_hmc_objs_req' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:132: warning: Function parameter or member 'rsrc_count' not described in 'vchnl_vf_send_add_hmc_objs_req' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:170: warning: Function parameter or member 'rsrc_type' not described in 'vchnl_vf_send_del_hmc_objs_req' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:170: warning: Function parameter or member 'start_index' not described in 'vchnl_vf_send_del_hmc_objs_req' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:170: warning: Function parameter or member 'rsrc_count' not described in 'vchnl_vf_send_del_hmc_objs_req' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:230: warning: Function parameter or member 'hmc_fcn' not described in 'vchnl_pf_send_get_hmc_fcn_resp' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:283: warning: Function parameter or member 'op_ret_code' not described in 'vchnl_pf_send_error_resp' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:305: warning: Function parameter or member 'dev' not described in 'pf_cqp_get_hmc_fcn_callback' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:305: warning: Function parameter or member 'callback_param' not described in 'pf_cqp_get_hmc_fcn_callback' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:305: warning: Function parameter or member 'cqe_info' not described in 'pf_cqp_get_hmc_fcn_callback' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:305: warning: Excess function parameter 'cqp_req_param' description in 'pf_cqp_get_hmc_fcn_callback' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:305: warning: Excess function parameter 'not_used' description in 'pf_cqp_get_hmc_fcn_callback' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:337: warning: Function parameter or member 'work_vf_dev' not described in 'pf_add_hmc_obj_callback' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:337: warning: Excess function parameter 'vf_dev' description in 'pf_add_hmc_obj_callback' drivers/infiniband/hw/i40iw/i40iw_virtchnl.c:412: warning: Function parameter or member 'dev' not described in 'i40iw_vf_init_pestat' Link: https://lore.kernel.org/r/20210118223929.512175-15-lee.jones@linaro.org Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_virtchnl.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c index 48fd327f876b..aca9061688ae 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c @@ -119,7 +119,7 @@ static enum i40iw_status_code vchnl_vf_send_get_pe_stats_req(struct i40iw_sc_dev return ret_code; } -/** +/* * vchnl_vf_send_add_hmc_objs_req - Add HMC objects * @dev: IWARP device pointer * @vchnl_req: Virtual channel message request pointer @@ -158,9 +158,9 @@ static enum i40iw_status_code vchnl_vf_send_add_hmc_objs_req(struct i40iw_sc_dev * vchnl_vf_send_del_hmc_objs_req - del HMC objects * @dev: IWARP device pointer * @vchnl_req: Virtual channel message request pointer - * @ rsrc_type - resource type to delete - * @ start_index - starting index for resource - * @ rsrc_count - number of resource type to delete + * @rsrc_type: resource type to delete + * @start_index: starting index for resource + * @rsrc_count: number of resource type to delete */ static enum i40iw_status_code vchnl_vf_send_del_hmc_objs_req(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_req *vchnl_req, @@ -222,6 +222,7 @@ static void vchnl_pf_send_get_ver_resp(struct i40iw_sc_dev *dev, * @dev: IWARP device pointer * @vf_id: Virtual function ID associated with the message * @vchnl_msg: Virtual channel message buffer pointer + * @hmc_fcn: HMC function index pointer */ static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev, u32 vf_id, @@ -276,6 +277,7 @@ static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev, * @dev: IWARP device pointer * @vf_id: Virtual function ID associated with the message * @vchnl_msg: Virtual channel message buffer pointer + * @op_ret_code: I40IW_ERR_* status code */ static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id, struct i40iw_virtchnl_op_buf *vchnl_msg, @@ -297,8 +299,9 @@ static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id, /** * pf_cqp_get_hmc_fcn_callback - Callback for Get HMC Fcn - * @cqp_req_param: CQP Request param value - * @not_used: unused CQP callback parameter + * @dev: IWARP device pointer + * @callback_param: unused CQP callback parameter + * @cqe_info: CQE information pointer */ static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback_param, struct i40iw_ccq_cqe_info *cqe_info) @@ -331,7 +334,7 @@ static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback /** * pf_add_hmc_obj - Callback for Add HMC Object - * @vf_dev: pointer to the VF Device + * @work_vf_dev: pointer to the VF Device */ static void pf_add_hmc_obj_callback(void *work_vf_dev) { @@ -404,7 +407,7 @@ static void pf_del_hmc_obj_callback(void *work_vf_dev) /** * i40iw_vf_init_pestat - Initialize stats for VF - * @devL pointer to the VF Device + * @dev: pointer to the VF Device * @stats: Statistics structure pointer * @index: Stats index */ From 737db46d29c0e80327bb0d2f46e5fa6ef27025a9 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:24 +0000 Subject: [PATCH 086/414] RDMA/hw/i40iw/i40iw_utils: Fix some misspellings and missing param descriptions Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/i40iw/i40iw_utils.c:66: warning: Function parameter or member 'ipv4' not described in 'i40iw_arp_table' drivers/infiniband/hw/i40iw/i40iw_utils.c:148: warning: Function parameter or member 'notifier' not described in 'i40iw_inetaddr_event' drivers/infiniband/hw/i40iw/i40iw_utils.c:148: warning: Excess function parameter 'notfier' description in 'i40iw_inetaddr_event' drivers/infiniband/hw/i40iw/i40iw_utils.c:224: warning: Function parameter or member 'notifier' not described in 'i40iw_inet6addr_event' drivers/infiniband/hw/i40iw/i40iw_utils.c:224: warning: Excess function parameter 'notfier' description in 'i40iw_inet6addr_event' drivers/infiniband/hw/i40iw/i40iw_utils.c:273: warning: Function parameter or member 'notifier' not described in 'i40iw_net_event' drivers/infiniband/hw/i40iw/i40iw_utils.c:273: warning: Excess function parameter 'notfier' description in 'i40iw_net_event' drivers/infiniband/hw/i40iw/i40iw_utils.c:320: warning: Function parameter or member 'notifier' not described in 'i40iw_netdevice_event' drivers/infiniband/hw/i40iw/i40iw_utils.c:320: warning: Excess function parameter 'notfier' description in 'i40iw_netdevice_event' drivers/infiniband/hw/i40iw/i40iw_utils.c:663: warning: Function parameter or member 'desc' not described in 'i40iw_debug_buf' drivers/infiniband/hw/i40iw/i40iw_utils.c:792: warning: Function parameter or member 'sdinfo' not described in 'i40iw_cqp_sds_cmd' drivers/infiniband/hw/i40iw/i40iw_utils.c:792: warning: Excess function parameter 'sd_info' description in 'i40iw_cqp_sds_cmd' drivers/infiniband/hw/i40iw/i40iw_utils.c:895: warning: Function parameter or member 't' not described in 'i40iw_terminate_timeout' drivers/infiniband/hw/i40iw/i40iw_utils.c:895: warning: Excess function parameter 'context' description in 'i40iw_terminate_timeout' drivers/infiniband/hw/i40iw/i40iw_utils.c:953: warning: Function parameter or member 'dev' not described in 'i40iw_cqp_spawn_worker' drivers/infiniband/hw/i40iw/i40iw_utils.c:953: warning: Excess function parameter 'iwdev' description in 'i40iw_cqp_spawn_worker' drivers/infiniband/hw/i40iw/i40iw_utils.c:1058: warning: Function parameter or member 'dev' not described in 'i40iw_cqp_query_fpm_values_cmd' drivers/infiniband/hw/i40iw/i40iw_utils.c:1058: warning: Excess function parameter 'iwdev' description in 'i40iw_cqp_query_fpm_values_cmd' drivers/infiniband/hw/i40iw/i40iw_utils.c:1120: warning: Function parameter or member 'dev' not described in 'i40iw_vf_wait_vchnl_resp' drivers/infiniband/hw/i40iw/i40iw_utils.c:1120: warning: Excess function parameter 'iwdev' description in 'i40iw_vf_wait_vchnl_resp' drivers/infiniband/hw/i40iw/i40iw_utils.c:1467: warning: Function parameter or member 't' not described in 'i40iw_hw_stats_timeout' drivers/infiniband/hw/i40iw/i40iw_utils.c:1467: warning: Excess function parameter 'vsi' description in 'i40iw_hw_stats_timeout' Link: https://lore.kernel.org/r/20210118223929.512175-16-lee.jones@linaro.org Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_utils.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 644f8c641aa0..76f052b12c14 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -55,6 +55,7 @@ * i40iw_arp_table - manage arp table * @iwdev: iwarp device * @ip_addr: ip address for device + * @ipv4: flag indicating IPv4 when true * @mac_addr: mac address ptr * @action: modify, delete or add */ @@ -138,7 +139,7 @@ inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg) /** * i40iw_inetaddr_event - system notifier for ipv4 addr events - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: if address */ @@ -214,7 +215,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, /** * i40iw_inet6addr_event - system notifier for ipv6 addr events - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: if address */ @@ -265,7 +266,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, /** * i40iw_net_event - system notifier for netevents - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: neighbor */ @@ -310,7 +311,7 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void * /** * i40iw_netdevice_event - system notifier for netdev events - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: netdev */ @@ -652,6 +653,7 @@ struct ib_qp *i40iw_get_qp(struct ib_device *device, int qpn) * i40iw_debug_buf - print debug msg and buffer is mask set * @dev: hardware control device structure * @mask: mask to compare if to print debug buffer + * @desc: identifying string * @buf: points buffer addr * @size: saize of buffer to print */ @@ -784,7 +786,7 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw, /** * i40iw_cqp_sds_cmd - create cqp command for sd * @dev: hardware control device structure - * @sd_info: information for sd cqp + * @sdinfo: information for sd cqp * */ enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev, @@ -889,7 +891,7 @@ void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred) /** * i40iw_terminate_imeout - timeout happened - * @context: points to iwarp qp + * @t: points to iwarp qp */ static void i40iw_terminate_timeout(struct timer_list *t) { @@ -943,7 +945,7 @@ static void i40iw_cqp_generic_worker(struct work_struct *work) /** * i40iw_cqp_spawn_worker - spawn worket thread - * @iwdev: device struct pointer + * @dev: device struct pointer * @work_info: work request info * @iw_vf_idx: virtual function index */ @@ -1048,7 +1050,7 @@ enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev, /** * i40iw_cqp_query_fpm_values_cmd - send cqp command for fpm - * @iwdev: function device struct + * @dev: function device struct * @values_mem: buffer for fpm * @hmc_fn_id: function id for fpm */ @@ -1114,7 +1116,7 @@ enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev, /** * i40iw_vf_wait_vchnl_resp - wait for channel msg - * @iwdev: function's device struct + * @dev: function's device struct */ enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev) { @@ -1461,7 +1463,7 @@ enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_in /** * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats - * @vsi: pointer to the vsi structure + * @t: Timer context containing pointer to the vsi structure */ static void i40iw_hw_stats_timeout(struct timer_list *t) { From 554c3b0b8079ea7367d4715c259569bba2237008 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:25 +0000 Subject: [PATCH 087/414] RDMA/core/restrack: Fix kernel-doc formatting issue Fixes the following W=1 kernel build warning(s): drivers/infiniband/core/restrack.c:209: warning: Function parameter or member 'res' not described in 'rdma_restrack_new' drivers/infiniband/core/restrack.c:209: warning: Function parameter or member 'type' not described in 'rdma_restrack_new' Link: https://lore.kernel.org/r/20210118223929.512175-17-lee.jones@linaro.org Cc: Doug Ledford Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/restrack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index ff1551b3cf61..ffabaf327242 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -201,8 +201,8 @@ EXPORT_SYMBOL(rdma_restrack_parent_name); /** * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface * to release memory in fully automatic way. - * @res - Entry to initialize - * @type - REstrack type + * @res: Entry to initialize + * @type: REstrack type */ void rdma_restrack_new(struct rdma_restrack_entry *res, enum rdma_restrack_type type) From 1896e5240883cb77035da463d05c7e67f5b6da4c Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:26 +0000 Subject: [PATCH 088/414] RDMA/hw/i40iw/i40iw_verbs: Fix worthy function headers and demote some others Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/i40iw/i40iw_verbs.c:273: warning: Excess function parameter 'iwdev' description in 'i40iw_free_qp_resources' drivers/infiniband/hw/i40iw/i40iw_verbs.c:273: warning: Excess function parameter 'qp_num' description in 'i40iw_free_qp_resources' drivers/infiniband/hw/i40iw/i40iw_verbs.c:307: warning: Function parameter or member 'udata' not described in 'i40iw_destroy_qp' drivers/infiniband/hw/i40iw/i40iw_verbs.c:348: warning: Function parameter or member 'iwdev' not described in 'i40iw_setup_virt_qp' drivers/infiniband/hw/i40iw/i40iw_verbs.c:348: warning: Function parameter or member 'iwqp' not described in 'i40iw_setup_virt_qp' drivers/infiniband/hw/i40iw/i40iw_verbs.c:348: warning: Excess function parameter 'dev' description in 'i40iw_setup_virt_qp' drivers/infiniband/hw/i40iw/i40iw_verbs.c:348: warning: Excess function parameter 'qp' description in 'i40iw_setup_virt_qp' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1248: warning: Function parameter or member 'pg_size' not described in 'i40iw_check_mem_contiguous' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1264: warning: Function parameter or member 'pg_size' not described in 'i40iw_check_mr_contiguous' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1539: warning: Function parameter or member 'sg_offset' not described in 'i40iw_map_mr_sg' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1886: warning: Function parameter or member 'udata' not described in 'i40iw_dereg_mr' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1953: warning: Function parameter or member 'dev' not described in 'hw_rev_show' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1953: warning: Function parameter or member 'attr' not described in 'hw_rev_show' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1953: warning: Function parameter or member 'buf' not described in 'hw_rev_show' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1967: warning: Function parameter or member 'dev' not described in 'hca_type_show' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1967: warning: Function parameter or member 'attr' not described in 'hca_type_show' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1967: warning: Function parameter or member 'buf' not described in 'hca_type_show' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1977: warning: Function parameter or member 'dev' not described in 'board_id_show' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1977: warning: Function parameter or member 'attr' not described in 'board_id_show' drivers/infiniband/hw/i40iw/i40iw_verbs.c:1977: warning: Function parameter or member 'buf' not described in 'board_id_show' Link: https://lore.kernel.org/r/20210118223929.512175-18-lee.jones@linaro.org Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 65aedfe57e77..f18d146a6079 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -265,9 +265,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va, /** * i40iw_free_qp_resources - free up memory resources for qp - * @iwdev: iwarp device * @iwqp: qp ptr (user or kernel) - * @qp_num: qp number assigned */ void i40iw_free_qp_resources(struct i40iw_qp *iwqp) { @@ -302,6 +300,7 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq) /** * i40iw_destroy_qp - destroy qp * @ibqp: qp's ib pointer also to get to device's qp address + * @udata: user data */ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { @@ -338,8 +337,8 @@ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) /** * i40iw_setup_virt_qp - setup for allocation of virtual qp - * @dev: iwarp device - * @qp: qp ptr + * @iwdev: iwarp device + * @iwqp: qp ptr * @init_info: initialize info to return */ static int i40iw_setup_virt_qp(struct i40iw_device *iwdev, @@ -1241,7 +1240,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous * @arr: lvl1 pbl array * @npages: page count - * pg_size: page size + * @pg_size: page size * */ static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) @@ -1258,7 +1257,7 @@ static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) /** * i40iw_check_mr_contiguous - check if MR is physically contiguous * @palloc: pbl allocation struct - * pg_size: page size + * @pg_size: page size */ static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size) { @@ -1533,6 +1532,7 @@ static int i40iw_set_page(struct ib_mr *ibmr, u64 addr) * @ibmr: ib mem to access iwarp mr pointer * @sg: scatter gather list for fmr * @sg_nents: number of sg pages + * @sg_offset: scatter gather offset */ static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) @@ -1881,6 +1881,7 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr, /** * i40iw_dereg_mr - deregister mr * @ib_mr: mr ptr for dereg + * @udata: user data */ static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { @@ -1945,7 +1946,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) return 0; } -/** +/* * hw_rev_show */ static ssize_t hw_rev_show(struct device *dev, @@ -1959,7 +1960,7 @@ static ssize_t hw_rev_show(struct device *dev, } static DEVICE_ATTR_RO(hw_rev); -/** +/* * hca_type_show */ static ssize_t hca_type_show(struct device *dev, @@ -1969,7 +1970,7 @@ static ssize_t hca_type_show(struct device *dev, } static DEVICE_ATTR_RO(hca_type); -/** +/* * board_id_show */ static ssize_t board_id_show(struct device *dev, From 78f20653531eaa53e3c56a8dd5b34350dbb54780 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:27 +0000 Subject: [PATCH 089/414] RDMA/core/counters: Demote non-conformant kernel-doc headers Fixes the following W=1 kernel build warning(s): drivers/infiniband/core/counters.c:36: warning: Function parameter or member 'dev' not described in 'rdma_counter_set_auto_mode' drivers/infiniband/core/counters.c:36: warning: Function parameter or member 'port' not described in 'rdma_counter_set_auto_mode' drivers/infiniband/core/counters.c:36: warning: Function parameter or member 'on' not described in 'rdma_counter_set_auto_mode' drivers/infiniband/core/counters.c:36: warning: Function parameter or member 'mask' not described in 'rdma_counter_set_auto_mode' drivers/infiniband/core/counters.c:238: warning: Function parameter or member 'qp' not described in 'rdma_get_counter_auto_mode' drivers/infiniband/core/counters.c:238: warning: Function parameter or member 'port' not described in 'rdma_get_counter_auto_mode' drivers/infiniband/core/counters.c:282: warning: Function parameter or member 'qp' not described in 'rdma_counter_bind_qp_auto' drivers/infiniband/core/counters.c:282: warning: Function parameter or member 'port' not described in 'rdma_counter_bind_qp_auto' drivers/infiniband/core/counters.c:320: warning: Function parameter or member 'qp' not described in 'rdma_counter_unbind_qp' drivers/infiniband/core/counters.c:388: warning: Function parameter or member 'dev' not described in 'rdma_counter_get_hwstat_value' drivers/infiniband/core/counters.c:388: warning: Function parameter or member 'port' not described in 'rdma_counter_get_hwstat_value' drivers/infiniband/core/counters.c:388: warning: Function parameter or member 'index' not described in 'rdma_counter_get_hwstat_value' drivers/infiniband/core/counters.c:444: warning: Function parameter or member 'dev' not described in 'rdma_counter_bind_qpn' drivers/infiniband/core/counters.c:444: warning: Function parameter or member 'port' not described in 'rdma_counter_bind_qpn' drivers/infiniband/core/counters.c:444: warning: Function parameter or member 'qp_num' not described in 'rdma_counter_bind_qpn' drivers/infiniband/core/counters.c:444: warning: Function parameter or member 'counter_id' not described in 'rdma_counter_bind_qpn' drivers/infiniband/core/counters.c:494: warning: Function parameter or member 'dev' not described in 'rdma_counter_bind_qpn_alloc' drivers/infiniband/core/counters.c:494: warning: Function parameter or member 'port' not described in 'rdma_counter_bind_qpn_alloc' drivers/infiniband/core/counters.c:494: warning: Function parameter or member 'qp_num' not described in 'rdma_counter_bind_qpn_alloc' drivers/infiniband/core/counters.c:494: warning: Function parameter or member 'counter_id' not described in 'rdma_counter_bind_qpn_alloc' drivers/infiniband/core/counters.c:541: warning: Function parameter or member 'dev' not described in 'rdma_counter_unbind_qpn' drivers/infiniband/core/counters.c:541: warning: Function parameter or member 'port' not described in 'rdma_counter_unbind_qpn' drivers/infiniband/core/counters.c:541: warning: Function parameter or member 'qp_num' not described in 'rdma_counter_unbind_qpn' drivers/infiniband/core/counters.c:541: warning: Function parameter or member 'counter_id' not described in 'rdma_counter_unbind_qpn' Link: https://lore.kernel.org/r/20210118223929.512175-19-lee.jones@linaro.org Cc: Doug Ledford Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 2b9a1ee7a160..f3a7c1f404af 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -24,7 +24,7 @@ static int __counter_set_mode(struct rdma_port_counter *port_counter, return 0; } -/** +/* * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode * * @dev: Device to operate @@ -231,7 +231,7 @@ static void counter_history_stat_update(struct rdma_counter *counter) port_counter->hstats->value[i] += counter->stats->value[i]; } -/** +/* * rdma_get_counter_auto_mode - Find the counter that @qp should be bound * with in auto mode * @@ -278,7 +278,7 @@ static void counter_release(struct kref *kref) rdma_counter_free(counter); } -/** +/* * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on * the auto-mode rule */ @@ -315,7 +315,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) return 0; } -/** +/* * rdma_counter_unbind_qp - Unbind a qp from a counter * @force: * true - Decrease the counter ref-count anyway (e.g., qp destroy) @@ -384,7 +384,7 @@ static u64 get_running_counters_hwstat_sum(struct ib_device *dev, return sum; } -/** +/* * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a * specific port, including the running ones and history data */ @@ -440,7 +440,7 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, return counter; } -/** +/* * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id */ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, @@ -489,7 +489,7 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, return ret; } -/** +/* * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it * The id of new counter is returned in @counter_id */ @@ -537,7 +537,7 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, return ret; } -/** +/* * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter */ int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, From db038e70f82ee912ebb5564588d665f3d60eca32 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:28 +0000 Subject: [PATCH 090/414] RDMA/core/iwpm_util: Fix some param description misspellings Fixes the following W=1 kernel build warning(s): drivers/infiniband/core/iwpm_util.c:138: warning: Function parameter or member 'local_sockaddr' not described in 'iwpm_create_mapinfo' drivers/infiniband/core/iwpm_util.c:138: warning: Function parameter or member 'mapped_sockaddr' not described in 'iwpm_create_mapinfo' drivers/infiniband/core/iwpm_util.c:138: warning: Excess function parameter 'local_addr' description in 'iwpm_create_mapinfo' drivers/infiniband/core/iwpm_util.c:138: warning: Excess function parameter 'mapped_addr' description in 'iwpm_create_mapinfo' drivers/infiniband/core/iwpm_util.c:185: warning: Function parameter or member 'local_sockaddr' not described in 'iwpm_remove_mapinfo' drivers/infiniband/core/iwpm_util.c:185: warning: Excess function parameter 'local_addr' description in 'iwpm_remove_mapinfo' Link: https://lore.kernel.org/r/20210118223929.512175-20-lee.jones@linaro.org Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/iwpm_util.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 13495b43dbc1..f80e5550b51f 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -127,8 +127,8 @@ static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *, /** * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address * info in a hash table - * @local_addr: Local ip/tcp address - * @mapped_addr: Mapped local ip/tcp address + * @local_sockaddr: Local ip/tcp address + * @mapped_sockaddr: Mapped local ip/tcp address * @nl_client: The index of the netlink client * @map_flags: IWPM mapping flags */ @@ -174,7 +174,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, /** * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address * info from the hash table - * @local_addr: Local ip/tcp address + * @local_sockaddr: Local ip/tcp address * @mapped_local_addr: Mapped local ip/tcp address * * Returns err code if mapping info is not found in the hash table, From abfa456555f4c2bde191cf12064168a05a2865a1 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 18 Jan 2021 22:39:29 +0000 Subject: [PATCH 091/414] RDMA/core/iwpm_msg: Add proper descriptions for 'skb' param Fixes the following W=1 kernel build warning(s): drivers/infiniband/core/iwpm_msg.c:402: warning: Function parameter or member 'skb' not described in 'iwpm_register_pid_cb' drivers/infiniband/core/iwpm_msg.c:475: warning: Function parameter or member 'skb' not described in 'iwpm_add_mapping_cb' drivers/infiniband/core/iwpm_msg.c:553: warning: Function parameter or member 'skb' not described in 'iwpm_add_and_query_mapping_cb' drivers/infiniband/core/iwpm_msg.c:636: warning: Function parameter or member 'skb' not described in 'iwpm_remote_info_cb' drivers/infiniband/core/iwpm_msg.c:716: warning: Function parameter or member 'skb' not described in 'iwpm_mapping_info_cb' drivers/infiniband/core/iwpm_msg.c:773: warning: Function parameter or member 'skb' not described in 'iwpm_ack_mapping_info_cb' drivers/infiniband/core/iwpm_msg.c:803: warning: Function parameter or member 'skb' not described in 'iwpm_mapping_error_cb' drivers/infiniband/core/iwpm_msg.c:851: warning: Function parameter or member 'skb' not described in 'iwpm_hello_cb' Link: https://lore.kernel.org/r/20210118223929.512175-21-lee.jones@linaro.org Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/iwpm_msg.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 46686990a827..30a0ff76b332 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -392,7 +392,7 @@ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = { /** * iwpm_register_pid_cb - Process the port mapper response to * iwpm_register_pid query - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) * * If successful, the function receives the userspace port mapper pid @@ -468,7 +468,7 @@ static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = { /** * iwpm_add_mapping_cb - Process the port mapper response to * iwpm_add_mapping request - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) */ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb) @@ -545,7 +545,7 @@ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = /** * iwpm_add_and_query_mapping_cb - Process the port mapper response to * iwpm_add_and_query_mapping request - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) */ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, @@ -627,7 +627,7 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, /** * iwpm_remote_info_cb - Process remote connecting peer address info, which * the port mapper has received from the connecting peer - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) * * Stores the IPv4/IPv6 address info in a hash table @@ -706,7 +706,7 @@ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = { /** * iwpm_mapping_info_cb - Process a notification that the userspace * port mapper daemon is started - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) * * Using the received port mapper pid, send all the local mapping @@ -766,7 +766,7 @@ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = { /** * iwpm_ack_mapping_info_cb - Process the port mapper ack for * the provided local mapping info records - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) */ int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) @@ -796,7 +796,7 @@ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = { /** * iwpm_mapping_error_cb - Process port mapper notification for error * - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) */ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb) @@ -841,7 +841,7 @@ static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = { /** * iwpm_hello_cb - Process a hello message from iwpmd * - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) * * Using the received port mapper pid, send the kernel's abi_version From 368c0159d492d7fbdb5791b40c9263ec4e97a10f Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Tue, 15 Dec 2020 13:27:13 -0800 Subject: [PATCH 092/414] RDMA/umem: Support importing dma-buf as user memory region Dma-buf is a standard cross-driver buffer sharing mechanism that can be used to support peer-to-peer access from RDMA devices. Device memory exported via dma-buf is associated with a file descriptor. This is passed to the user space as a property associated with the buffer allocation. When the buffer is registered as a memory region, the file descriptor is passed to the RDMA driver along with other parameters. Implement the common code for importing dma-buf object and mapping dma-buf pages. Link: https://lore.kernel.org/r/1608067636-98073-2-git-send-email-jianxin.xiong@intel.com Signed-off-by: Jianxin Xiong Reviewed-by: Sean Hefty Acked-by: Michael J. Ruhl Acked-by: Christian Koenig Acked-by: Daniel Vetter Signed-off-by: Jason Gunthorpe --- drivers/infiniband/Kconfig | 1 + drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/umem.c | 3 + drivers/infiniband/core/umem_dmabuf.c | 174 ++++++++++++++++++++++++++ include/rdma/ib_umem.h | 48 ++++++- 5 files changed, 224 insertions(+), 4 deletions(-) create mode 100644 drivers/infiniband/core/umem_dmabuf.c diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 9325e189a215..04a78d9f8fe3 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -41,6 +41,7 @@ config INFINIBAND_USER_MEM bool depends on INFINIBAND_USER_ACCESS != n depends on MMU + select DMA_SHARED_BUFFER default y config INFINIBAND_ON_DEMAND_PAGING diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index ccf2670ef45e..8ab4eea5a0a5 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -40,5 +40,5 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_std_types_srq.o \ uverbs_std_types_wq.o \ uverbs_std_types_qp.o -ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o +ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 917338db7ac1..2dde99a9ba07 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -2,6 +2,7 @@ * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -278,6 +279,8 @@ void ib_umem_release(struct ib_umem *umem) { if (!umem) return; + if (umem->is_dmabuf) + return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem)); if (umem->is_odp) return ib_umem_odp_release(to_ib_umem_odp(umem)); diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c new file mode 100644 index 000000000000..f9b5162d9260 --- /dev/null +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright (c) 2020 Intel Corporation. All rights reserved. + */ + +#include +#include +#include + +#include "uverbs.h" + +int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + struct sg_table *sgt; + struct scatterlist *sg; + struct dma_fence *fence; + unsigned long start, end, cur = 0; + unsigned int nmap = 0; + int i; + + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + if (umem_dmabuf->sgt) + goto wait_fence; + + sgt = dma_buf_map_attachment(umem_dmabuf->attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + /* modify the sg list in-place to match umem address and length */ + + start = ALIGN_DOWN(umem_dmabuf->umem.address, PAGE_SIZE); + end = ALIGN(umem_dmabuf->umem.address + umem_dmabuf->umem.length, + PAGE_SIZE); + for_each_sgtable_dma_sg(sgt, sg, i) { + if (start < cur + sg_dma_len(sg) && cur < end) + nmap++; + if (cur <= start && start < cur + sg_dma_len(sg)) { + unsigned long offset = start - cur; + + umem_dmabuf->first_sg = sg; + umem_dmabuf->first_sg_offset = offset; + sg_dma_address(sg) += offset; + sg_dma_len(sg) -= offset; + cur += offset; + } + if (cur < end && end <= cur + sg_dma_len(sg)) { + unsigned long trim = cur + sg_dma_len(sg) - end; + + umem_dmabuf->last_sg = sg; + umem_dmabuf->last_sg_trim = trim; + sg_dma_len(sg) -= trim; + break; + } + cur += sg_dma_len(sg); + } + + umem_dmabuf->umem.sg_head.sgl = umem_dmabuf->first_sg; + umem_dmabuf->umem.sg_head.nents = nmap; + umem_dmabuf->umem.nmap = nmap; + umem_dmabuf->sgt = sgt; + +wait_fence: + /* + * Although the sg list is valid now, the content of the pages + * may be not up-to-date. Wait for the exporter to finish + * the migration. + */ + fence = dma_resv_get_excl(umem_dmabuf->attach->dmabuf->resv); + if (fence) + return dma_fence_wait(fence, false); + + return 0; +} +EXPORT_SYMBOL(ib_umem_dmabuf_map_pages); + +void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + if (!umem_dmabuf->sgt) + return; + + /* retore the original sg list */ + if (umem_dmabuf->first_sg) { + sg_dma_address(umem_dmabuf->first_sg) -= + umem_dmabuf->first_sg_offset; + sg_dma_len(umem_dmabuf->first_sg) += + umem_dmabuf->first_sg_offset; + umem_dmabuf->first_sg = NULL; + umem_dmabuf->first_sg_offset = 0; + } + if (umem_dmabuf->last_sg) { + sg_dma_len(umem_dmabuf->last_sg) += + umem_dmabuf->last_sg_trim; + umem_dmabuf->last_sg = NULL; + umem_dmabuf->last_sg_trim = 0; + } + + dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt, + DMA_BIDIRECTIONAL); + + umem_dmabuf->sgt = NULL; +} +EXPORT_SYMBOL(ib_umem_dmabuf_unmap_pages); + +struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, size_t size, + int fd, int access, + const struct dma_buf_attach_ops *ops) +{ + struct dma_buf *dmabuf; + struct ib_umem_dmabuf *umem_dmabuf; + struct ib_umem *umem; + unsigned long end; + struct ib_umem_dmabuf *ret = ERR_PTR(-EINVAL); + + if (check_add_overflow(offset, (unsigned long)size, &end)) + return ret; + + if (unlikely(!ops || !ops->move_notify)) + return ret; + + dmabuf = dma_buf_get(fd); + if (IS_ERR(dmabuf)) + return ERR_CAST(dmabuf); + + if (dmabuf->size < end) + goto out_release_dmabuf; + + umem_dmabuf = kzalloc(sizeof(*umem_dmabuf), GFP_KERNEL); + if (!umem_dmabuf) { + ret = ERR_PTR(-ENOMEM); + goto out_release_dmabuf; + } + + umem = &umem_dmabuf->umem; + umem->ibdev = device; + umem->length = size; + umem->address = offset; + umem->writable = ib_access_writable(access); + umem->is_dmabuf = 1; + + if (!ib_umem_num_pages(umem)) + goto out_free_umem; + + umem_dmabuf->attach = dma_buf_dynamic_attach( + dmabuf, + device->dma_device, + ops, + umem_dmabuf); + if (IS_ERR(umem_dmabuf->attach)) { + ret = ERR_CAST(umem_dmabuf->attach); + goto out_free_umem; + } + return umem_dmabuf; + +out_free_umem: + kfree(umem_dmabuf); + +out_release_dmabuf: + dma_buf_put(dmabuf); + return ret; +} +EXPORT_SYMBOL(ib_umem_dmabuf_get); + +void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) +{ + struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf; + + dma_buf_detach(dmabuf, umem_dmabuf->attach); + dma_buf_put(dmabuf); + kfree(umem_dmabuf); +} diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 7752211c9638..676c57f5ca80 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. */ #ifndef IB_UMEM_H @@ -13,6 +14,7 @@ struct ib_ucontext; struct ib_umem_odp; +struct dma_buf_attach_ops; struct ib_umem { struct ib_device *ibdev; @@ -22,12 +24,29 @@ struct ib_umem { unsigned long address; u32 writable : 1; u32 is_odp : 1; + u32 is_dmabuf : 1; struct work_struct work; struct sg_table sg_head; int nmap; unsigned int sg_nents; }; +struct ib_umem_dmabuf { + struct ib_umem umem; + struct dma_buf_attachment *attach; + struct sg_table *sgt; + struct scatterlist *first_sg; + struct scatterlist *last_sg; + unsigned long first_sg_offset; + unsigned long last_sg_trim; + void *private; +}; + +static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem) +{ + return container_of(umem, struct ib_umem_dmabuf, umem); +} + /* Returns the offset of the umem start relative to the first page. */ static inline int ib_umem_offset(struct ib_umem *umem) { @@ -86,6 +105,7 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt); + /** * ib_umem_find_best_pgoff - Find best HW page size * @@ -116,6 +136,14 @@ static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, dma_addr & pgoff_bitmask); } +struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, size_t size, + int fd, int access, + const struct dma_buf_attach_ops *ops); +int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf); +void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf); +void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf); + #else /* CONFIG_INFINIBAND_USER_MEM */ #include @@ -124,12 +152,12 @@ static inline struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, size_t size, int access) { - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } static inline void ib_umem_release(struct ib_umem *umem) { } static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length) { - return -EINVAL; + return -EOPNOTSUPP; } static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, @@ -143,7 +171,21 @@ static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, { return 0; } +static inline +struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, + size_t size, int fd, + int access, + struct dma_buf_attach_ops *ops) +{ + return ERR_PTR(-EOPNOTSUPP); +} +static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + return -EOPNOTSUPP; +} +static inline void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) { } +static inline void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { } #endif /* CONFIG_INFINIBAND_USER_MEM */ - #endif /* IB_UMEM_H */ From 3bc489e8827a93b14d27211ae0576b3c1de85000 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Tue, 15 Dec 2020 13:27:14 -0800 Subject: [PATCH 093/414] RDMA/core: Add device method for registering dma-buf based memory region Dma-buf based memory region requires one extra parameter and is processed quite differently. Adding a separate method allows clean separation from regular memory regions. Link: https://lore.kernel.org/r/1608067636-98073-3-git-send-email-jianxin.xiong@intel.com Signed-off-by: Jianxin Xiong Reviewed-by: Sean Hefty Acked-by: Michael J. Ruhl Acked-by: Christian Koenig Acked-by: Daniel Vetter Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + include/rdma/ib_verbs.h | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3d08373c7797..c895d7bfa512 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2681,6 +2681,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, read_counters); SET_DEVICE_OP(dev_ops, reg_dm_mr); SET_DEVICE_OP(dev_ops, reg_user_mr); + SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf); SET_DEVICE_OP(dev_ops, req_ncomp_notif); SET_DEVICE_OP(dev_ops, req_notify_cq); SET_DEVICE_OP(dev_ops, rereg_user_mr); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9fed65bf9279..62e574c50555 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2,7 +2,7 @@ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2020 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -2434,6 +2434,10 @@ struct ib_device_ops { struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); + struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset, + u64 length, u64 virt_addr, int fd, + int mr_access_flags, + struct ib_udata *udata); struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, From bfe0cc6eb2491aeae2bba02a305fcce13cd90624 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Tue, 15 Dec 2020 13:27:15 -0800 Subject: [PATCH 094/414] RDMA/uverbs: Add uverbs command for dma-buf based MR registration Implement a new uverbs ioctl method for memory registration with file descriptor as an extra parameter. Link: https://lore.kernel.org/r/1608067636-98073-4-git-send-email-jianxin.xiong@intel.com Signed-off-by: Jianxin Xiong Reviewed-by: Sean Hefty Acked-by: Michael J. Ruhl Acked-by: Christian Koenig Acked-by: Daniel Vetter Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_mr.c | 117 +++++++++++++++++- include/uapi/rdma/ib_user_ioctl_cmds.h | 14 +++ 2 files changed, 129 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index dd4e76b26c74..f782d5e1aa25 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -182,6 +183,86 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_MR)( return IS_UVERBS_COPY_ERR(ret) ? ret : 0; } +static int UVERBS_HANDLER(UVERBS_METHOD_REG_DMABUF_MR)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE); + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE); + struct ib_device *ib_dev = pd->device; + + u64 offset, length, iova; + u32 fd, access_flags; + struct ib_mr *mr; + int ret; + + if (!ib_dev->ops.reg_user_mr_dmabuf) + return -EOPNOTSUPP; + + ret = uverbs_copy_from(&offset, attrs, + UVERBS_ATTR_REG_DMABUF_MR_OFFSET); + if (ret) + return ret; + + ret = uverbs_copy_from(&length, attrs, + UVERBS_ATTR_REG_DMABUF_MR_LENGTH); + if (ret) + return ret; + + ret = uverbs_copy_from(&iova, attrs, + UVERBS_ATTR_REG_DMABUF_MR_IOVA); + if (ret) + return ret; + + if ((offset & ~PAGE_MASK) != (iova & ~PAGE_MASK)) + return -EINVAL; + + ret = uverbs_copy_from(&fd, attrs, + UVERBS_ATTR_REG_DMABUF_MR_FD); + if (ret) + return ret; + + ret = uverbs_get_flags32(&access_flags, attrs, + UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, + IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_ATOMIC | + IB_ACCESS_RELAXED_ORDERING); + if (ret) + return ret; + + ret = ib_check_mr_access(ib_dev, access_flags); + if (ret) + return ret; + + mr = pd->device->ops.reg_user_mr_dmabuf(pd, offset, length, iova, fd, + access_flags, + &attrs->driver_udata); + if (IS_ERR(mr)) + return PTR_ERR(mr); + + mr->device = pd->device; + mr->pd = pd; + mr->type = IB_MR_TYPE_USER; + mr->uobject = uobj; + atomic_inc(&pd->usecnt); + + uobj->object = mr; + + uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE); + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY, + &mr->lkey, sizeof(mr->lkey)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, + &mr->rkey, sizeof(mr->rkey)); + return ret; +} + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_ADVISE_MR, UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE, @@ -247,6 +328,37 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_REG_DMABUF_MR, + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_IOVA, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_FD, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, + enum ib_access_flags), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_METHOD_MR_DESTROY, UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE, @@ -257,10 +369,11 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_MR, UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), + &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR), &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG), &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY), - &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR), - &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR)); + &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR), + &UVERBS_METHOD(UVERBS_METHOD_REG_DMABUF_MR)); const struct uapi_definition uverbs_def_obj_mr[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR, diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 7968a1845355..dafc7ebe545b 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -251,6 +252,7 @@ enum uverbs_methods_mr { UVERBS_METHOD_MR_DESTROY, UVERBS_METHOD_ADVISE_MR, UVERBS_METHOD_QUERY_MR, + UVERBS_METHOD_REG_DMABUF_MR, }; enum uverbs_attrs_mr_destroy_ids { @@ -272,6 +274,18 @@ enum uverbs_attrs_query_mr_cmd_attr_ids { UVERBS_ATTR_QUERY_MR_RESP_IOVA, }; +enum uverbs_attrs_reg_dmabuf_mr_cmd_attr_ids { + UVERBS_ATTR_REG_DMABUF_MR_HANDLE, + UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE, + UVERBS_ATTR_REG_DMABUF_MR_OFFSET, + UVERBS_ATTR_REG_DMABUF_MR_LENGTH, + UVERBS_ATTR_REG_DMABUF_MR_IOVA, + UVERBS_ATTR_REG_DMABUF_MR_FD, + UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, + UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY, + UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, +}; + enum uverbs_attrs_create_counters_cmd_attr_ids { UVERBS_ATTR_CREATE_COUNTERS_HANDLE, }; From 90da7dc8206a5999a23af719733f1cda26b5f815 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Tue, 15 Dec 2020 13:27:16 -0800 Subject: [PATCH 095/414] RDMA/mlx5: Support dma-buf based userspace memory region Implement the new driver method 'reg_user_mr_dmabuf'. Utilize the core functions to import dma-buf based memory region and update the mappings. Add code to handle dma-buf related page fault. Link: https://lore.kernel.org/r/1608067636-98073-5-git-send-email-jianxin.xiong@intel.com Signed-off-by: Jianxin Xiong Reviewed-by: Sean Hefty Acked-by: Michael J. Ruhl Acked-by: Christian Koenig Acked-by: Daniel Vetter Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 18 ++++ drivers/infiniband/hw/mlx5/mr.c | 119 +++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/odp.c | 89 +++++++++++++++++++- 4 files changed, 216 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4747cc16b391..ecbb8443b0ec 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. */ #include @@ -4068,6 +4069,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .query_srq = mlx5_ib_query_srq, .query_ucontext = mlx5_ib_query_ucontext, .reg_user_mr = mlx5_ib_reg_user_mr, + .reg_user_mr_dmabuf = mlx5_ib_reg_user_mr_dmabuf, .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index acc56eaa7356..2fd2927abe45 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. */ #ifndef MLX5_IB_H @@ -703,6 +704,12 @@ static inline bool is_odp_mr(struct mlx5_ib_mr *mr) mr->umem->is_odp; } +static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_dmabuf; +} + struct mlx5_ib_mw { struct ib_mw ibmw; struct mlx5_core_mkey mmkey; @@ -1243,6 +1250,10 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata); int mlx5_ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, @@ -1253,11 +1264,13 @@ int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); +int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags); struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); +void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr); struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); @@ -1343,6 +1356,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr); +int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1368,6 +1382,10 @@ static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { return -EOPNOTSUPP; } +static inline int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ extern const struct mmu_interval_notifier_ops mlx5_mn_ops; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 24f8d59a42ea..a63ef7c66e38 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,6 +37,8 @@ #include #include #include +#include +#include #include #include #include @@ -935,6 +938,17 @@ static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr->access_flags = access_flags; } +static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem, + u64 iova) +{ + /* + * The alignment of iova has already been checked upon entering + * UVERBS_METHOD_REG_DMABUF_MR + */ + umem->iova = iova; + return PAGE_SIZE; +} + static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags) @@ -944,7 +958,11 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct mlx5_ib_mr *mr; unsigned int page_size; - page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (umem->is_dmabuf) + page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); + else + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, + 0, iova); if (WARN_ON(!page_size)) return ERR_PTR(-EINVAL); ent = mr_cache_ent_from_order( @@ -980,7 +998,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; mr->page_shift = order_base_2(page_size); - mr->umem = umem; set_mr_fields(dev, mr, umem->length, access_flags); return mr; @@ -1201,8 +1218,10 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, /* * Send the DMA list to the HW for a normal MR using UMR. + * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP + * flag may be used. */ -static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) { struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct device *ddev = &dev->mdev->pdev->dev; @@ -1244,6 +1263,10 @@ static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) cur_mtt->ptag = cpu_to_be64(rdma_block_iter_dma_address(&biter) | MLX5_IB_MTT_PRESENT); + + if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) + cur_mtt->ptag = 0; + cur_mtt++; } @@ -1567,6 +1590,84 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return create_real_mr(pd, umem, iova, access_flags); } +static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) +{ + struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; + struct mlx5_ib_mr *mr = umem_dmabuf->private; + + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + if (!umem_dmabuf->sgt) + return; + + mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); + ib_umem_dmabuf_unmap_pages(umem_dmabuf); +} + +static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = { + .allow_peer2peer = 1, + .move_notify = mlx5_ib_dmabuf_invalidate_cb, +}; + +struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_mr *mr = NULL; + struct ib_umem_dmabuf *umem_dmabuf; + int err; + + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || + !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + return ERR_PTR(-EOPNOTSUPP); + + mlx5_ib_dbg(dev, + "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n", + offset, virt_addr, length, fd, access_flags); + + /* dmabuf requires xlt update via umr to work. */ + if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + return ERR_PTR(-EINVAL); + + umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd, + access_flags, + &mlx5_ib_dmabuf_attach_ops); + if (IS_ERR(umem_dmabuf)) { + mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n", + PTR_ERR(umem_dmabuf)); + return ERR_CAST(umem_dmabuf); + } + + mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr, + access_flags); + if (IS_ERR(mr)) { + ib_umem_release(&umem_dmabuf->umem); + return ERR_CAST(mr); + } + + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); + + atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); + umem_dmabuf->private = mr; + init_waitqueue_head(&mr->q_deferred_work); + atomic_set(&mr->num_deferred_work, 0); + err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_KERNEL)); + if (err) + goto err_dereg_mr; + + err = mlx5_ib_init_dmabuf_mr(mr); + if (err) + goto err_dereg_mr; + return &mr->ibmr; + +err_dereg_mr: + dereg_mr(dev, mr); + return ERR_PTR(err); +} + /** * mlx5_mr_cache_invalidate - Fence all DMA on the MR * @mr: The MR to fence @@ -1740,8 +1841,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, return ERR_PTR(err); return NULL; } - /* DM or ODP MR's don't have a umem so we can't re-use it */ - if (!mr->umem || is_odp_mr(mr)) + /* DM or ODP MR's don't have a normal umem so we can't re-use it */ + if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) goto recreate; /* @@ -1760,10 +1861,10 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } /* - * DM doesn't have a PAS list so we can't re-use it, odp does but the - * logic around releasing the umem is different + * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does + * but the logic around releasing the umem is different */ - if (!mr->umem || is_odp_mr(mr)) + if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) goto recreate; if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && @@ -1876,6 +1977,8 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Stop all DMA */ if (is_odp_mr(mr)) mlx5_ib_fence_odp_mr(mr); + else if (is_dmabuf_mr(mr)) + mlx5_ib_fence_dmabuf_mr(mr); else clean_mr(dev, mr); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index aa2413b50adc..440fbf7b66e4 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include "mlx5_ib.h" #include "cmd.h" @@ -670,6 +672,37 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) dma_fence_odp_mr(mr); } +/** + * mlx5_ib_fence_dmabuf_mr - Stop all access to the dmabuf MR + * @mr: to fence + * + * On return no parallel threads will be touching this MR and no DMA will be + * active. + */ +void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); + + /* Prevent new page faults and prefetch requests from succeeding */ + xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + + /* Wait for all running page-fault handlers to finish. */ + synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); + + wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + mlx5_mr_cache_invalidate(mr); + umem_dmabuf->private = NULL; + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + if (!mr->cache_ent) { + mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey); + WARN_ON(mr->descs); + } +} + #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) #define MLX5_PF_FLAGS_SNAPSHOT BIT(2) #define MLX5_PF_FLAGS_ENABLE BIT(3) @@ -803,6 +836,44 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, return ret; } +static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt, + u32 *bytes_mapped, u32 flags) +{ + struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); + u32 xlt_flags = 0; + int err; + unsigned int page_size; + + if (flags & MLX5_PF_FLAGS_ENABLE) + xlt_flags |= MLX5_IB_UPD_XLT_ENABLE; + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + err = ib_umem_dmabuf_map_pages(umem_dmabuf); + if (err) { + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + return err; + } + + page_size = mlx5_umem_find_best_pgsz(&umem_dmabuf->umem, mkc, + log_page_size, 0, + umem_dmabuf->umem.iova); + if (unlikely(page_size < PAGE_SIZE)) { + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + err = -EINVAL; + } else { + err = mlx5_ib_update_mr_pas(mr, xlt_flags); + } + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + if (err) + return err; + + if (bytes_mapped) + *bytes_mapped += bcnt; + + return ib_umem_num_pages(mr->umem); +} + /* * Returns: * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are @@ -821,6 +892,9 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; + if (mr->umem->is_dmabuf) + return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags); + if (!odp->is_implicit_odp) { u64 user_va; @@ -847,6 +921,16 @@ int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) return ret >= 0 ? 0 : ret; } +int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + int ret; + + ret = pagefault_dmabuf_mr(mr, mr->umem->length, NULL, + MLX5_PF_FLAGS_ENABLE); + + return ret >= 0 ? 0 : ret; +} + struct pf_frame { struct pf_frame *next; u32 key; @@ -1749,7 +1833,6 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_core_mkey *mmkey; - struct ib_umem_odp *odp; struct mlx5_ib_mr *mr; lockdep_assert_held(&dev->odp_srcu); @@ -1763,11 +1846,9 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, if (mr->ibmr.pd != pd) return NULL; - odp = to_ib_umem_odp(mr->umem); - /* prefetch with write-access must be supported by the MR */ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && - !odp->umem.writable) + !mr->umem->writable) return NULL; return mr; From efeb973ffce7f3460d814ab91f22cbbc1cce7af8 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Wed, 16 Dec 2020 15:17:54 +0800 Subject: [PATCH 096/414] RDMA/uverbs: Don't set rcq for a QP if qp_type is IB_QPT_XRC_INI An INI QP doesn't require receive CQ, the creation flow sets the recv counts to zero: if (cmd->qp_type == IB_QPT_XRC_INI) { cmd->max_recv_wr = 0; cmd->max_recv_sge = 0; The new IOCTL path also does not set the rcq, so make things the same. Link: https://lore.kernel.org/r/20201216071755.149449-1-yangx.jy@cn.fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 98a5d36813ff..f5b8be3bedde 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1382,7 +1382,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (has_sq) scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle, attrs); - if (!ind_tbl) + if (!ind_tbl && cmd->qp_type != IB_QPT_XRC_INI) rcq = rcq ?: scq; pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs); From 7ebc7dc87103652c552ecd973b9e7bf24ee7c7c2 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 19 Jan 2021 16:18:15 -0800 Subject: [PATCH 097/414] docs/scheduler/sched-bwc: formatting fix Since commit d6a3b247627a3 these three lines are merged into one by the RST processor, making it hard to read. Use bullet points to separate the entries, like it's done in other similar places. Signed-off-by: Kir Kolyshkin Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210120001824.385168-2-kolyshkin@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/scheduler/sched-bwc.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/scheduler/sched-bwc.rst b/Documentation/scheduler/sched-bwc.rst index 9801d6b284b1..4af4ee1f3600 100644 --- a/Documentation/scheduler/sched-bwc.rst +++ b/Documentation/scheduler/sched-bwc.rst @@ -25,9 +25,10 @@ Management ---------- Quota and period are managed within the cpu subsystem via cgroupfs. -cpu.cfs_quota_us: the total available run-time within a period (in microseconds) -cpu.cfs_period_us: the length of a period (in microseconds) -cpu.stat: exports throttling statistics [explained further below] +- cpu.cfs_quota_us: the total available run-time within a period (in + microseconds) +- cpu.cfs_period_us: the length of a period (in microseconds) +- cpu.stat: exports throttling statistics [explained further below] The default values are:: From f1779d13edf3157d8636b45bd756f9439ebe7d85 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 19 Jan 2021 16:18:16 -0800 Subject: [PATCH 098/414] docs/scheduler/sched-design-CFS: formatting fix Fix the rendering of the paragraph. Before the fix, the first line is rendered in bold (I'm not quite sure why) and is also separated from the rest of the paragraph, which is rendered with an indent. Signed-off-by: Kir Kolyshkin Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210120001824.385168-3-kolyshkin@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/scheduler/sched-design-CFS.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/scheduler/sched-design-CFS.rst b/Documentation/scheduler/sched-design-CFS.rst index a96c72651877..59b2d1fb4dc4 100644 --- a/Documentation/scheduler/sched-design-CFS.rst +++ b/Documentation/scheduler/sched-design-CFS.rst @@ -34,9 +34,9 @@ In CFS the virtual runtime is expressed and tracked via the per-task p->se.vruntime (nanosec-unit) value. This way, it's possible to accurately timestamp and measure the "expected CPU time" a task should have gotten. -[ small detail: on "ideal" hardware, at any time all tasks would have the same - p->se.vruntime value --- i.e., tasks would execute simultaneously and no task - would ever get "out of balance" from the "ideal" share of CPU time. ] + Small detail: on "ideal" hardware, at any time all tasks would have the same + p->se.vruntime value --- i.e., tasks would execute simultaneously and no task + would ever get "out of balance" from the "ideal" share of CPU time. CFS's task picking logic is based on this p->se.vruntime value and it is thus very simple: it always tries to run the task with the smallest p->se.vruntime From 6c57c12d0f74a1f76dee5b6f7b93a321d57dfd7a Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 19 Jan 2021 16:18:17 -0800 Subject: [PATCH 099/414] docs/scheduler/sched-bwc: fix note rendering Signed-off-by: Kir Kolyshkin Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210120001824.385168-4-kolyshkin@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/scheduler/sched-bwc.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/scheduler/sched-bwc.rst b/Documentation/scheduler/sched-bwc.rst index 4af4ee1f3600..a44860d33ffc 100644 --- a/Documentation/scheduler/sched-bwc.rst +++ b/Documentation/scheduler/sched-bwc.rst @@ -2,8 +2,9 @@ CFS Bandwidth Control ===================== -[ This document only discusses CPU bandwidth control for SCHED_NORMAL. - The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.rst ] +.. note:: + This document only discusses CPU bandwidth control for SCHED_NORMAL. + The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.rst CFS bandwidth control is a CONFIG_FAIR_GROUP_SCHED extension which allows the specification of the maximum CPU bandwidth available to a group or hierarchy. From e5ba9ea634501b6820f263a405ae465fee28abb8 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 19 Jan 2021 16:18:19 -0800 Subject: [PATCH 100/414] docs/scheduler/sched-bwc: note/link cgroup v2 Signed-off-by: Kir Kolyshkin Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210120001824.385168-6-kolyshkin@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/cgroup-v2.rst | 4 ++++ Documentation/scheduler/sched-bwc.rst | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index e0f6ff7cfa93..b55362454886 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1,3 +1,5 @@ +.. _cgroup-v2: + ================ Control Group v2 ================ @@ -954,6 +956,8 @@ All cgroup core files are prefixed with "cgroup." Controllers =========== +.. _cgroup-v2-cpu: + CPU --- diff --git a/Documentation/scheduler/sched-bwc.rst b/Documentation/scheduler/sched-bwc.rst index a44860d33ffc..845eee659199 100644 --- a/Documentation/scheduler/sched-bwc.rst +++ b/Documentation/scheduler/sched-bwc.rst @@ -26,6 +26,11 @@ Management ---------- Quota and period are managed within the cpu subsystem via cgroupfs. +.. note:: + The cgroupfs files described in this section are only applicable + to cgroup v1. For cgroup v2, see + :ref:`Documentation/admin-guide/cgroupv2.rst `. + - cpu.cfs_quota_us: the total available run-time within a period (in microseconds) - cpu.cfs_period_us: the length of a period (in microseconds) From a21e7bb3d6d954f1db819b5423b885eb2122bffb Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 19 Jan 2021 16:18:20 -0800 Subject: [PATCH 101/414] docs/admin-guide: cgroup-v2: typos and spaces - fix a typo (mempry -> memory) in a file name; - add space before "(" where appropriate. Signed-off-by: Kir Kolyshkin Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210120001824.385168-7-kolyshkin@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/cgroup-v2.rst | 34 ++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index b55362454886..073f976d44ea 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1263,9 +1263,9 @@ PAGE_SIZE multiple when read back. can show up in the middle. Don't rely on items remaining in a fixed position; use the keys to look up specific values! - If the entry has no per-node counter(or not show in the - mempry.numa_stat). We use 'npn'(non-per-node) as the tag - to indicate that it will not show in the mempry.numa_stat. + If the entry has no per-node counter (or not show in the + memory.numa_stat). We use 'npn' (non-per-node) as the tag + to indicate that it will not show in the memory.numa_stat. anon Amount of memory used in anonymous mappings such as @@ -1281,11 +1281,11 @@ PAGE_SIZE multiple when read back. pagetables Amount of memory allocated for page tables. - percpu(npn) + percpu (npn) Amount of memory used for storing per-cpu kernel data structures. - sock(npn) + sock (npn) Amount of memory used in network transmission buffers shmem @@ -1333,7 +1333,7 @@ PAGE_SIZE multiple when read back. Part of "slab" that cannot be reclaimed on memory pressure. - slab(npn) + slab (npn) Amount of memory used for storing in-kernel data structures. @@ -1361,39 +1361,39 @@ PAGE_SIZE multiple when read back. workingset_nodereclaim Number of times a shadow node has been reclaimed - pgfault(npn) + pgfault (npn) Total number of page faults incurred - pgmajfault(npn) + pgmajfault (npn) Number of major page faults incurred - pgrefill(npn) + pgrefill (npn) Amount of scanned pages (in an active LRU list) - pgscan(npn) + pgscan (npn) Amount of scanned pages (in an inactive LRU list) - pgsteal(npn) + pgsteal (npn) Amount of reclaimed pages - pgactivate(npn) + pgactivate (npn) Amount of pages moved to the active LRU list - pgdeactivate(npn) + pgdeactivate (npn) Amount of pages moved to the inactive LRU list - pglazyfree(npn) + pglazyfree (npn) Amount of pages postponed to be freed under memory pressure - pglazyfreed(npn) + pglazyfreed (npn) Amount of reclaimed lazyfree pages - thp_fault_alloc(npn) + thp_fault_alloc (npn) Number of transparent hugepages which were allocated to satisfy a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE is not set. - thp_collapse_alloc(npn) + thp_collapse_alloc (npn) Number of transparent hugepages which were allocated to allow collapsing an existing range of pages. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE is not set. From 0d17d017fd090b3ec434373adb800cfc0cb6e7f6 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 19 Jan 2021 16:18:21 -0800 Subject: [PATCH 102/414] docs/admin-guide: cgroup-v2: fix cgroup.type rendering Due to an extra vertical whitespace, this was not recognised as a definition list entry, and thus was not rendered like the rest of cgroupfs files. Signed-off-by: Kir Kolyshkin Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210120001824.385168-8-kolyshkin@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/cgroup-v2.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 073f976d44ea..c1b6ffc286cf 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -788,7 +788,6 @@ Core Interface Files All cgroup core files are prefixed with "cgroup." cgroup.type - A read-write single value file which exists on non-root cgroups. From 8a32d0fee43d9f4bcaacb3234301a8f1fea8925b Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 19 Jan 2021 16:18:22 -0800 Subject: [PATCH 103/414] doc/admin-guide/cgroup-v2: use tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These two places are rendered like a table in the source (rst) code, but they are seen as plain text by formatters, and thus are joined together into a single line, e.g.: > “root” - a partition root “member” - a non-root member of a partition This is definitely not what was intended. To fix, use table formatting, like in other places. Signed-off-by: Kir Kolyshkin Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210120001824.385168-9-kolyshkin@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/cgroup-v2.rst | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index c1b6ffc286cf..6f59f13f28d0 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2003,10 +2003,12 @@ Cpuset Interface Files cpuset-enabled cgroups. This flag is owned by the parent cgroup and is not delegatable. - It accepts only the following input values when written to. + It accepts only the following input values when written to. - "root" - a partition root - "member" - a non-root member of a partition + ======== ================================ + "root" a partition root + "member" a non-root member of a partition + ======== ================================ When set to be a partition root, the current cgroup is the root of a new partition or scheduling domain that comprises @@ -2047,9 +2049,11 @@ Cpuset Interface Files root to change. On read, the "cpuset.sched.partition" file can show the following values. - "member" Non-root member of a partition - "root" Partition root - "root invalid" Invalid partition root + ============== ============================== + "member" Non-root member of a partition + "root" Partition root + "root invalid" Invalid partition root + ============== ============================== It is a partition root if the first 2 partition root conditions above are true and at least one CPU from "cpuset.cpus" is From 7361ec680c32d43053fc8a5570b182ff3cda8b1b Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 19 Jan 2021 16:18:23 -0800 Subject: [PATCH 104/414] docs/admin-guide/cgroup-v2: nit Improper Capitalization. Signed-off-by: Kir Kolyshkin Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210120001824.385168-10-kolyshkin@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/cgroup-v2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 6f59f13f28d0..136902cd0e98 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2226,7 +2226,7 @@ Without cgroup namespace, the "/proc/$PID/cgroup" file shows the complete path of the cgroup of a process. In a container setup where a set of cgroups and namespaces are intended to isolate processes the "/proc/$PID/cgroup" file may leak potential system level information -to the isolated processes. For Example:: +to the isolated processes. For example:: # cat /proc/self/cgroup 0::/batchjobs/container_id1 From ffcc972a88aa23c5aa728a96f8362a01b7480510 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 19 Jan 2021 16:18:24 -0800 Subject: [PATCH 105/414] docs/admin-guide/cgroup-v2: fix mount opt rendering Due to an extra empty line between the option and its description it is rendered not like in other places. Remove the empty lines to fix. Signed-off-by: Kir Kolyshkin Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210120001824.385168-11-kolyshkin@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/cgroup-v2.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 136902cd0e98..14a7523c46a4 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -174,7 +174,6 @@ disabling controllers in v1 and make them always available in v2. cgroup v2 currently supports the following mount options. nsdelegate - Consider cgroup namespaces as delegation boundaries. This option is system wide and can only be set on mount or modified through remount from the init namespace. The mount option is @@ -182,7 +181,6 @@ cgroup v2 currently supports the following mount options. Delegation section for details. memory_localevents - Only populate memory.events with data for the current cgroup, and not any subtrees. This is legacy behaviour, the default behaviour without this option is to include subtree counts. @@ -191,7 +189,6 @@ cgroup v2 currently supports the following mount options. option is ignored on non-init namespace mounts. memory_recursiveprot - Recursively apply memory.min and memory.low protection to entire subtrees, without requiring explicit downward propagation into leaf cgroups. This allows protecting entire From ba1a297d78f47a23b138fb5cad953ae4a3fa4929 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 19 Jan 2021 10:53:26 +0100 Subject: [PATCH 106/414] arch/Kconfig: update a broken file reference Commit adab66b71abf ("Revert: "ring-buffer: Remove HAVE_64BIT_ALIGNED_ACCESS"") added the config HAVE_64BIT_ALIGNED_ACCESS back into arch/Kconfig with this revert. In the meantime, commit c9b54d6f362c ("docs: move other kAPI documents to core-api") changed ./Documentation/unaligned-memory-access.txt to ./Documentation/core-api/unaligned-memory-access.rst. Fortunately, ./scripts/documentation-file-ref-check detects this and warns about this broken reference. Update the file reference in arch/Kconfig. Fixes: adab66b71abf ("Revert: "ring-buffer: Remove HAVE_64BIT_ALIGNED_ACCESS"") Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20210119095326.13896-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- arch/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 24862d15f3a3..dc104b8270c0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -156,8 +156,8 @@ config HAVE_64BIT_ALIGNED_ACCESS accesses are required to be 64 bit aligned in this way even though it is not a 64 bit architecture. - See Documentation/unaligned-memory-access.txt for more - information on the topic of unaligned memory accesses. + See Documentation/core-api/unaligned-memory-access.rst for + more information on the topic of unaligned memory accesses. config HAVE_EFFICIENT_UNALIGNED_ACCESS bool From 1008bfd8e3510f5d7ca1216677e2daec4b9d2add Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Tue, 19 Jan 2021 11:03:20 +0800 Subject: [PATCH 107/414] docs: iio: Correct a typo There are two EP9312, one of them should be 9315 Signed-off-by: Yanteng Si Link: https://lore.kernel.org/r/20210119030320.2860870-1-siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- Documentation/iio/ep93xx_adc.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/iio/ep93xx_adc.rst b/Documentation/iio/ep93xx_adc.rst index 4fd8dea3f6b8..0af0e9040457 100644 --- a/Documentation/iio/ep93xx_adc.rst +++ b/Documentation/iio/ep93xx_adc.rst @@ -13,7 +13,7 @@ touchscreen/ADC module. ==================== Numbering scheme for channels 0..4 is defined in EP9301 and EP9302 datasheets. -EP9307, EP9312 and EP9312 have 3 channels more (total 8), but the numbering is +EP9307, EP9312 and EP9315 have 3 channels more (total 8), but the numbering is not defined. So the last three are numbered randomly, let's say. Assuming ep93xx_adc is IIO device0, you'd find the following entries under From 047a4aba71e90c4d4f4d9c25b3591130db75315d Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 21 Jan 2021 10:41:13 +0800 Subject: [PATCH 108/414] docs/zh_CN: remove cn_index tag in mips It's a unused tag with a incorrect big name but just for mips arch. So remove it. Signed-off-by: Alex Shi Cc: Yanteng Si Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/20210121024113.16344-1-alex.shi@linux.alibaba.com Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/mips/index.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/translations/zh_CN/mips/index.rst b/Documentation/translations/zh_CN/mips/index.rst index 27a2eae8484a..b85033f9d67c 100644 --- a/Documentation/translations/zh_CN/mips/index.rst +++ b/Documentation/translations/zh_CN/mips/index.rst @@ -5,9 +5,6 @@ :Original: :doc:`../../../mips/index` :Translator: Yanteng Si -.. _cn_index: - - =========================== MIPS特性文档 =========================== From f7775c20847c7c46ffdd1b03a9238bc791b7efaa Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 17 Jan 2021 13:33:51 -0800 Subject: [PATCH 109/414] AFS: Documentation: fix a few typos in afs.rst Fix typos (punctuation, grammar, spelling) in afs.rst. Signed-off-by: Randy Dunlap Cc: David Howells Cc: linux-afs@lists.infradead.org Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/20210117213351.1075-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/afs.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/afs.rst b/Documentation/filesystems/afs.rst index 0abb155ac666..ca062a7f8ee2 100644 --- a/Documentation/filesystems/afs.rst +++ b/Documentation/filesystems/afs.rst @@ -109,7 +109,7 @@ Mountpoints AFS has a concept of mountpoints. In AFS terms, these are specially formatted symbolic links (of the same form as the "device name" passed to mount). kAFS presents these to the user as directories that have a follow-link capability -(ie: symbolic link semantics). If anyone attempts to access them, they will +(i.e.: symbolic link semantics). If anyone attempts to access them, they will automatically cause the target volume to be mounted (if possible) on that site. Automatically mounted filesystems will be automatically unmounted approximately @@ -144,7 +144,7 @@ looks up a cell of the same name, for example:: Proc Filesystem =============== -The AFS modules creates a "/proc/fs/afs/" directory and populates it: +The AFS module creates a "/proc/fs/afs/" directory and populates it: (*) A "cells" file that lists cells currently known to the afs module and their usage counts:: @@ -201,7 +201,7 @@ And then run as:: ./klog Assuming it's successful, this adds a key of type RxRPC, named for the service -and cell, eg: "afs@". This can be viewed with the keyctl program or +and cell, e.g.: "afs@". This can be viewed with the keyctl program or by cat'ing /proc/keys:: [root@andromeda ~]# keyctl show @@ -211,7 +211,7 @@ by cat'ing /proc/keys:: 111416553 --als--v 0 0 \_ rxrpc: afs@CAMBRIDGE.REDHAT.COM Currently the username, realm, password and proposed ticket lifetime are -compiled in to the program. +compiled into the program. It is not required to acquire a key before using AFS facilities, but if one is not acquired then all operations will be governed by the anonymous user parts From 4c9a3a6c9c5478ce6b4b9db6c964244474676bcb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:37 +0100 Subject: [PATCH 110/414] parport: fix a kernel-doc markup The kernel-doc markup inside share.c is actually for __parport_register_driver. The actual goal seems to be to document parport_register_driver(). So, fix the existing markup and add a new one. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/dc0778af8c466cc667409ead05876a5cfd3cbece.1610610937.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- drivers/parport/share.c | 2 +- include/linux/parport.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/parport/share.c b/drivers/parport/share.c index 7fec4fefe151..62f8407923d4 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -243,7 +243,7 @@ static int port_detect(struct device *dev, void *dev_drv) } /** - * parport_register_driver - register a parallel port device driver + * __parport_register_driver - register a parallel port device driver * @drv: structure describing the driver * @owner: owner module of drv * @mod_name: module name string diff --git a/include/linux/parport.h b/include/linux/parport.h index 1fb508c19e83..f981f794c850 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -297,6 +297,37 @@ int __must_check __parport_register_driver(struct parport_driver *, * parport_register_driver must be a macro so that KBUILD_MODNAME can * be expanded */ + +/** + * parport_register_driver - register a parallel port device driver + * @driver: structure describing the driver + * + * This can be called by a parallel port device driver in order + * to receive notifications about ports being found in the + * system, as well as ports no longer available. + * + * If devmodel is true then the new device model is used + * for registration. + * + * The @driver structure is allocated by the caller and must not be + * deallocated until after calling parport_unregister_driver(). + * + * If using the non device model: + * The driver's attach() function may block. The port that + * attach() is given will be valid for the duration of the + * callback, but if the driver wants to take a copy of the + * pointer it must call parport_get_port() to do so. Calling + * parport_register_device() on that port will do this for you. + * + * The driver's detach() function may block. The port that + * detach() is given will be valid for the duration of the + * callback, but if the driver wants to take a copy of the + * pointer it must call parport_get_port() to do so. + * + * + * Returns 0 on success. The non device model will always succeeds. + * but the new device model can fail and will return the error code. + **/ #define parport_register_driver(driver) \ __parport_register_driver(driver, THIS_MODULE, KBUILD_MODNAME) From 3de990b09a03828ed1195e1a973004e863a67209 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:38 +0100 Subject: [PATCH 111/414] rapidio: fix kernel-doc a markup Probaly this was due to a cut and paste issue. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/e23e64983788a51dd9099e2b0d881e1f64ecbc5b.1610610937.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- drivers/rapidio/rio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index c2b79736a92b..e74cf09eeff0 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -749,7 +749,7 @@ int rio_map_outb_region(struct rio_mport *mport, u16 destid, u64 rbase, EXPORT_SYMBOL_GPL(rio_map_outb_region); /** - * rio_unmap_inb_region -- Unmap the inbound memory region + * rio_unmap_outb_region -- Unmap the inbound memory region * @mport: Master port * @destid: destination id mapping points to * @rstart: RIO base address window translates to From 961f3c898e86de9b530648b6b0319f9240508f16 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:39 +0100 Subject: [PATCH 112/414] fs: fix kernel-doc markups Two markups are at the wrong place. Kernel-doc only support having the comment just before the identifier. Also, some identifiers have different names between their prototypes and the kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/96b1e1b388600ab092331f6c4e88ff8e8779ce6c.1610610937.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- fs/dcache.c | 63 ++++++++++++++++++++++++++------------------------- fs/inode.c | 4 ++-- fs/seq_file.c | 5 ++-- fs/super.c | 12 +++++----- 4 files changed, 43 insertions(+), 41 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 97e81a844a96..c17fd15b01d4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -456,23 +456,6 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry, list_lru_isolate_move(lru, &dentry->d_lru, list); } -/** - * d_drop - drop a dentry - * @dentry: dentry to drop - * - * d_drop() unhashes the entry from the parent dentry hashes, so that it won't - * be found through a VFS lookup any more. Note that this is different from - * deleting the dentry - d_delete will try to mark the dentry negative if - * possible, giving a successful _negative_ lookup, while d_drop will - * just make the cache lookup fail. - * - * d_drop() is used mainly for stuff that wants to invalidate a dentry for some - * reason (NFS timeouts or autofs deletes). - * - * __d_drop requires dentry->d_lock - * ___d_drop doesn't mark dentry as "unhashed" - * (dentry->d_hash.pprev will be LIST_POISON2, not NULL). - */ static void ___d_drop(struct dentry *dentry) { struct hlist_bl_head *b; @@ -501,6 +484,24 @@ void __d_drop(struct dentry *dentry) } EXPORT_SYMBOL(__d_drop); +/** + * d_drop - drop a dentry + * @dentry: dentry to drop + * + * d_drop() unhashes the entry from the parent dentry hashes, so that it won't + * be found through a VFS lookup any more. Note that this is different from + * deleting the dentry - d_delete will try to mark the dentry negative if + * possible, giving a successful _negative_ lookup, while d_drop will + * just make the cache lookup fail. + * + * d_drop() is used mainly for stuff that wants to invalidate a dentry for some + * reason (NFS timeouts or autofs deletes). + * + * __d_drop requires dentry->d_lock + * + * ___d_drop doesn't mark dentry as "unhashed" + * (dentry->d_hash.pprev will be LIST_POISON2, not NULL). + */ void d_drop(struct dentry *dentry) { spin_lock(&dentry->d_lock); @@ -996,20 +997,6 @@ struct dentry *d_find_any_alias(struct inode *inode) } EXPORT_SYMBOL(d_find_any_alias); -/** - * d_find_alias - grab a hashed alias of inode - * @inode: inode in question - * - * If inode has a hashed alias, or is a directory and has any alias, - * acquire the reference to alias and return it. Otherwise return NULL. - * Notice that if inode is a directory there can be only one alias and - * it can be unhashed only if it has no children, or if it is the root - * of a filesystem, or if the directory was renamed and d_revalidate - * was the first vfs operation to notice. - * - * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer - * any other hashed alias over that one. - */ static struct dentry *__d_find_alias(struct inode *inode) { struct dentry *alias; @@ -1029,6 +1016,20 @@ static struct dentry *__d_find_alias(struct inode *inode) return NULL; } +/** + * d_find_alias - grab a hashed alias of inode + * @inode: inode in question + * + * If inode has a hashed alias, or is a directory and has any alias, + * acquire the reference to alias and return it. Otherwise return NULL. + * Notice that if inode is a directory there can be only one alias and + * it can be unhashed only if it has no children, or if it is the root + * of a filesystem, or if the directory was renamed and d_revalidate + * was the first vfs operation to notice. + * + * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer + * any other hashed alias over that one. + */ struct dentry *d_find_alias(struct inode *inode) { struct dentry *de = NULL; diff --git a/fs/inode.c b/fs/inode.c index 6442d97d9a4a..1dc9e032f659 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1493,7 +1493,7 @@ struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval, EXPORT_SYMBOL(find_inode_rcu); /** - * find_inode_by_rcu - Find an inode in the inode cache + * find_inode_by_ino_rcu - Find an inode in the inode cache * @sb: Super block of file system to search * @ino: The inode number to match * @@ -1777,7 +1777,7 @@ static int update_time(struct inode *inode, struct timespec64 *time, int flags) } /** - * touch_atime - update the access time + * atime_needs_update - update the access time * @path: the &struct path to update * @inode: inode to update * diff --git a/fs/seq_file.c b/fs/seq_file.c index 03a369ccd28c..cb11a34fb871 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -669,7 +669,8 @@ void seq_puts(struct seq_file *m, const char *s) EXPORT_SYMBOL(seq_puts); /** - * A helper routine for putting decimal numbers without rich format of printf(). + * seq_put_decimal_ull_width - A helper routine for putting decimal numbers + * without rich format of printf(). * only 'unsigned long long' is supported. * @m: seq_file identifying the buffer to which data should be written * @delimiter: a string which is printed before the number @@ -1044,7 +1045,7 @@ struct hlist_node *seq_hlist_next_rcu(void *v, EXPORT_SYMBOL(seq_hlist_next_rcu); /** - * seq_hlist_start_precpu - start an iteration of a percpu hlist array + * seq_hlist_start_percpu - start an iteration of a percpu hlist array * @head: pointer to percpu array of struct hlist_heads * @cpu: pointer to cpu "cursor" * @pos: start position of sequence diff --git a/fs/super.c b/fs/super.c index 2c6cdea2ab2d..18a3de0b93f5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1718,12 +1718,6 @@ int freeze_super(struct super_block *sb) } EXPORT_SYMBOL(freeze_super); -/** - * thaw_super -- unlock filesystem - * @sb: the super to thaw - * - * Unlocks the filesystem and marks it writeable again after freeze_super(). - */ static int thaw_super_locked(struct super_block *sb) { int error; @@ -1759,6 +1753,12 @@ static int thaw_super_locked(struct super_block *sb) return 0; } +/** + * thaw_super -- unlock filesystem + * @sb: the super to thaw + * + * Unlocks the filesystem and marks it writeable again after freeze_super(). + */ int thaw_super(struct super_block *sb) { down_write(&sb->s_umount); From 4fd15eaf429a1e7de9d653f0ff2e8a50733495b8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:40 +0100 Subject: [PATCH 113/414] pstore/zone: fix a kernel-doc markup The documented struct is psz_head and not psz_buffer. Reviewed-by: Kees Cook Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/a67ca4d12c3ef277dadb9e0d0df8450158e637cc.1610610937.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- fs/pstore/zone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 5266ccbec007..7c8f8feac6c3 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -23,7 +23,7 @@ #include "internal.h" /** - * struct psz_head - header of zone to flush to storage + * struct psz_buffer - header of zone to flush to storage * * @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value) * @datalen: length of data in @data From e23bd83368af41ce420bd0eacb32817ccf7d3ca2 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:41 +0100 Subject: [PATCH 114/414] firmware: stratix10-svc: fix kernel-doc markups There are some common comments marked, instead, with kernel-doc notation, which won't work. While here, rename an identifier, in order to match the function prototype below kernel-doc markup. Acked-by: Richard Gong Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/02a1eb47767e01e875d8840805b8b2d4f3c6bdee.1610610937.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/firmware/intel/stratix10-svc-client.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index a93d85932eb9..ebc295647581 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -6,7 +6,7 @@ #ifndef __STRATIX10_SVC_CLIENT_H #define __STRATIX10_SVC_CLIENT_H -/** +/* * Service layer driver supports client names * * fpga: for FPGA configuration @@ -15,7 +15,7 @@ #define SVC_CLIENT_FPGA "fpga" #define SVC_CLIENT_RSU "rsu" -/** +/* * Status of the sent command, in bit number * * SVC_STATUS_OK: @@ -50,7 +50,7 @@ #define SVC_STATUS_ERROR 5 #define SVC_STATUS_NO_SUPPORT 6 -/** +/* * Flag bit for COMMAND_RECONFIG * * COMMAND_RECONFIG_FLAG_PARTIAL: @@ -58,7 +58,7 @@ */ #define COMMAND_RECONFIG_FLAG_PARTIAL 1 -/** +/* * Timeout settings for service clients: * timeout value used in Stratix10 FPGA manager driver. * timeout value used in RSU driver @@ -218,7 +218,7 @@ void stratix10_svc_free_memory(struct stratix10_svc_chan *chan, void *kaddr); int stratix10_svc_send(struct stratix10_svc_chan *chan, void *msg); /** - * intel_svc_done() - complete service request + * stratix10_svc_done() - complete service request * @chan: service channel assigned to the client * * This function is used by service client to inform service layer that From 3aa1141f9916af3258e2bc69e294e2cc07d2ddf1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:42 +0100 Subject: [PATCH 115/414] connector: fix a kernel-doc markup A function has a different name between their prototype and its kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/889cfb141a98ae06d5bc79b744786ec2e8f92d93.1610610937.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/connector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/connector.h b/include/linux/connector.h index 8ea860efea37..487350bb19c3 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -99,7 +99,7 @@ void cn_del_callback(const struct cb_id *id); int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 group, gfp_t gfp_mask); /** - * cn_netlink_send_mult - Sends message to the specified groups. + * cn_netlink_send - Sends message to the specified groups. * * @msg: message header(with attached data). * @portid: destination port. From 8276d3b4398d1b407cc8199edd506207ad0ac1cf Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:43 +0100 Subject: [PATCH 116/414] lib/crc7: fix a kernel-doc markup A function has a different name between their prototype and its kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/1963266e84c6fb0154602f276b04887c85f80bd3.1610610937.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- lib/crc7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/crc7.c b/lib/crc7.c index 6a848d73e804..3848e313b722 100644 --- a/lib/crc7.c +++ b/lib/crc7.c @@ -51,7 +51,7 @@ const u8 crc7_be_syndrome_table[256] = { EXPORT_SYMBOL(crc7_be_syndrome_table); /** - * crc7 - update the CRC7 for the data buffer + * crc7_be - update the CRC7 for the data buffer * @crc: previous CRC7 value * @buffer: data pointer * @len: number of bytes in the buffer From 909782ad0a36e4e5f875a431e280cba7b9b046fa Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:44 +0100 Subject: [PATCH 117/414] memblock: fix kernel-doc markups Some identifiers have different names between their prototypes and the kernel-doc markup. Acked-by: Mike Rapoport Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/f3c65f61367993a607f9daf9dc1a3bdab1f0a040.1610610937.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b93c44b9121e..9cc6da7b513e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -272,7 +272,7 @@ void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, unsigned long *out_spfn, unsigned long *out_epfn); /** - * for_each_free_mem_range_in_zone - iterate through zone specific free + * for_each_free_mem_pfn_range_in_zone - iterate through zone specific free * memblock areas * @i: u64 used as loop variable * @zone: zone in which all of the memory blocks reside @@ -292,7 +292,7 @@ void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end)) /** - * for_each_free_mem_range_in_zone_from - iterate through zone specific + * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific * free memblock areas from a given point * @i: u64 used as loop variable * @zone: zone in which all of the memory blocks reside From 484cac791015e786f857c1cd98b7696aaf1e8a7a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:45 +0100 Subject: [PATCH 118/414] w1: fix a kernel-doc markup A function has a different name between their prototype and its kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/2dc136ff6290d7c8919599d21bee244f31647c8c.1610610937.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/w1.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/w1.h b/include/linux/w1.h index 949d3b10e531..9a2a0ef39018 100644 --- a/include/linux/w1.h +++ b/include/linux/w1.h @@ -280,7 +280,7 @@ int w1_register_family(struct w1_family *family); void w1_unregister_family(struct w1_family *family); /** - * module_w1_driver() - Helper macro for registering a 1-Wire families + * module_w1_family() - Helper macro for registering a 1-Wire families * @__w1_family: w1_family struct * * Helper macro for 1-Wire families which do not do anything special in module From 3950b92f96912cf2578c94a31961296c9680a0f2 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:46 +0100 Subject: [PATCH 119/414] selftests: kselftest_harness.h: partially fix kernel-doc markups The kernel-doc markups on this file are weird: they don't follow what's specified at: Documentation/doc-guide/kernel-doc.rst In particular, markups should use this format: identifier - description and not this: identifier(args) The way the definitions are inside this file cause the parser to completely miss the identifier name of each function. This prevents improving the script to do some needed validation tests. Address this part. Yet, furter changes are needed in order for it to fully follow the specs. Acked-by: Kees Cook Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/8383758160fdb4fcbb2ac56beeb874ca6dffc6b9.1610610937.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- tools/testing/selftests/kselftest_harness.h | 26 ++++++++++++--------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index edce85420d19..ae0f0f33b2a6 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -79,7 +79,7 @@ #endif /** - * TH_LOG(fmt, ...) + * TH_LOG() * * @fmt: format string * @...: optional arguments @@ -113,12 +113,16 @@ __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__) /** - * SKIP(statement, fmt, ...) + * SKIP() * * @statement: statement to run after reporting SKIP * @fmt: format string * @...: optional arguments * + * .. code-block:: c + * + * SKIP(statement, fmt, ...); + * * This forces a "pass" after reporting why something is being skipped * and runs "statement", which is usually "return" or "goto skip". */ @@ -136,7 +140,7 @@ } while (0) /** - * TEST(test_name) - Defines the test function and creates the registration + * TEST() - Defines the test function and creates the registration * stub * * @test_name: test name @@ -155,7 +159,7 @@ #define TEST(test_name) __TEST_IMPL(test_name, -1) /** - * TEST_SIGNAL(test_name, signal) + * TEST_SIGNAL() * * @test_name: test name * @signal: signal number @@ -195,7 +199,7 @@ struct __test_metadata __attribute__((unused)) *_metadata) /** - * FIXTURE_DATA(datatype_name) - Wraps the struct name so we have one less + * FIXTURE_DATA() - Wraps the struct name so we have one less * argument to pass around * * @datatype_name: datatype name @@ -212,7 +216,7 @@ #define FIXTURE_DATA(datatype_name) struct _test_data_##datatype_name /** - * FIXTURE(fixture_name) - Called once per fixture to setup the data and + * FIXTURE() - Called once per fixture to setup the data and * register * * @fixture_name: fixture name @@ -239,7 +243,7 @@ FIXTURE_DATA(fixture_name) /** - * FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture. + * FIXTURE_SETUP() - Prepares the setup function for the fixture. * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. * * @fixture_name: fixture name @@ -265,7 +269,7 @@ __attribute__((unused)) *variant) /** - * FIXTURE_TEARDOWN(fixture_name) + * FIXTURE_TEARDOWN() * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. * * @fixture_name: fixture name @@ -286,7 +290,7 @@ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) /** - * FIXTURE_VARIANT(fixture_name) - Optionally called once per fixture + * FIXTURE_VARIANT() - Optionally called once per fixture * to declare fixture variant * * @fixture_name: fixture name @@ -305,7 +309,7 @@ #define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name /** - * FIXTURE_VARIANT_ADD(fixture_name, variant_name) - Called once per fixture + * FIXTURE_VARIANT_ADD() - Called once per fixture * variant to setup and register the data * * @fixture_name: fixture name @@ -339,7 +343,7 @@ _##fixture_name##_##variant_name##_variant = /** - * TEST_F(fixture_name, test_name) - Emits test registration and helpers for + * TEST_F() - Emits test registration and helpers for * fixture-based test cases * * @fixture_name: fixture name From 9cde12ba07ce71cdc420b9eb8dbd5b19f0bbc730 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 20 Jan 2021 17:40:04 -0800 Subject: [PATCH 120/414] dt-bindings: arm-smmu-qcom: Add Qualcomm SC8180X compatible Add compatible for the ARM SMMU found in the Qualcomm SC8180x platform. Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210121014005.1612382-1-bjorn.andersson@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 3b63f2ae24db..c50198e17d52 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -34,6 +34,7 @@ properties: items: - enum: - qcom,sc7180-smmu-500 + - qcom,sc8180x-smmu-500 - qcom,sdm845-smmu-500 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 From 1a7180ff81aadd21556a5b5df7d0a3280f7d744f Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 20 Jan 2021 17:40:05 -0800 Subject: [PATCH 121/414] iommu/arm-smmu-qcom: Add Qualcomm SC8180X impl The primary SMMU found in Qualcomm SC8180X platform needs to use the Qualcomm implementation, so add a specific compatible for this. Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210121014005.1612382-2-bjorn.andersson@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index bcda17012aee..82c7edc6e025 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -166,6 +166,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,mdss" }, { .compatible = "qcom,sc7180-mdss" }, { .compatible = "qcom,sc7180-mss-pil" }, + { .compatible = "qcom,sc8180x-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, { } @@ -327,6 +328,7 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8998-smmu-v2" }, { .compatible = "qcom,sc7180-smmu-500" }, + { .compatible = "qcom,sc8180x-smmu-500" }, { .compatible = "qcom,sdm630-smmu-v2" }, { .compatible = "qcom,sdm845-smmu-500" }, { .compatible = "qcom,sm8150-smmu-500" }, From 70b5b6a6daeac0ce8ae57bfec7e1dcf295b42336 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 15 Jan 2021 14:33:21 +0530 Subject: [PATCH 122/414] dt-bindings: arm-smmu: Add sm8350 compatible string Add compatible string for sm8350 iommu to documentation. Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20210115090322.2287538-1-vkoul@kernel.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index c50198e17d52..6ba161dea4d8 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -38,6 +38,7 @@ properties: - qcom,sdm845-smmu-500 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 + - qcom,sm8350-smmu-500 - const: arm,mmu-500 - description: Qcom Adreno GPUs implementing "arm,smmu-v2" items: From d8498b1e4ecc75d023c59acb30e15e3d8d8f07ea Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 15 Jan 2021 14:33:22 +0530 Subject: [PATCH 123/414] iommu: arm-smmu-impl: Add SM8350 qcom iommu implementation Add SM8350 qcom iommu implementation to the table of qcom_smmu_impl_of_match table which brings in iommu support for SM8350 SoC Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20210115090322.2287538-2-vkoul@kernel.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 82c7edc6e025..3b6fdbb6c916 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -333,6 +333,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sdm845-smmu-500" }, { .compatible = "qcom,sm8150-smmu-500" }, { .compatible = "qcom,sm8250-smmu-500" }, + { .compatible = "qcom,sm8350-smmu-500" }, { } }; From 86d2d9214880c1879cb0ff466cce2fcd9c853e44 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 21 Jan 2021 14:09:42 +0000 Subject: [PATCH 124/414] iommu/arm-smmu-v3: Remove the page 1 fixup Since we now keep track of page 1 via a separate pointer that already encapsulates aliasing to page 0 as necessary, we can remove the clunky fixup routine and simply use the relevant bases directly. The current architecture spec (IHI0070D.a) defines SMMU_{EVENTQ,PRIQ}_{PROD,CONS} as offsets relative to page 1, so the cleanup represents a little bit of convergence as well as just lines of code saved. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/08d9bda570bb5681f11a2f250a31be9ef763b8c5.1611238182.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 42 ++++++++------------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 8 ++-- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8ca7415d785d..bca458c00e48 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -88,15 +88,6 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; -static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset, - struct arm_smmu_device *smmu) -{ - if (offset > SZ_64K) - return smmu->page1 + offset - SZ_64K; - - return smmu->base + offset; -} - static void parse_driver_options(struct arm_smmu_device *smmu) { int i = 0; @@ -2611,6 +2602,7 @@ static struct iommu_ops arm_smmu_ops = { /* Probing and initialisation functions */ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, struct arm_smmu_queue *q, + void __iomem *page, unsigned long prod_off, unsigned long cons_off, size_t dwords, const char *name) @@ -2639,8 +2631,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, 1 << q->llq.max_n_shift, name); } - q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu); - q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu); + q->prod_reg = page + prod_off; + q->cons_reg = page + cons_off; q->ent_dwords = dwords; q->q_base = Q_BASE_RWA; @@ -2684,9 +2676,9 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) int ret; /* cmdq */ - ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD, - ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS, - "cmdq"); + ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base, + ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS, + CMDQ_ENT_DWORDS, "cmdq"); if (ret) return ret; @@ -2695,9 +2687,9 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) return ret; /* evtq */ - ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD, - ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS, - "evtq"); + ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1, + ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS, + EVTQ_ENT_DWORDS, "evtq"); if (ret) return ret; @@ -2705,9 +2697,9 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) if (!(smmu->features & ARM_SMMU_FEAT_PRI)) return 0; - return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, - ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS, - "priq"); + return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1, + ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS, + PRIQ_ENT_DWORDS, "priq"); } static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) @@ -3099,10 +3091,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) /* Event queue */ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); - writel_relaxed(smmu->evtq.q.llq.prod, - arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu)); - writel_relaxed(smmu->evtq.q.llq.cons, - arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu)); + writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD); + writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS); enables |= CR0_EVTQEN; ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, @@ -3117,9 +3107,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) writeq_relaxed(smmu->priq.q.q_base, smmu->base + ARM_SMMU_PRIQ_BASE); writel_relaxed(smmu->priq.q.llq.prod, - arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu)); + smmu->page1 + ARM_SMMU_PRIQ_PROD); writel_relaxed(smmu->priq.q.llq.cons, - arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu)); + smmu->page1 + ARM_SMMU_PRIQ_CONS); enables |= CR0_PRIQEN; ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 96c2e9565e00..da525f46dab4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -139,15 +139,15 @@ #define ARM_SMMU_CMDQ_CONS 0x9c #define ARM_SMMU_EVTQ_BASE 0xa0 -#define ARM_SMMU_EVTQ_PROD 0x100a8 -#define ARM_SMMU_EVTQ_CONS 0x100ac +#define ARM_SMMU_EVTQ_PROD 0xa8 +#define ARM_SMMU_EVTQ_CONS 0xac #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0 #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8 #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc #define ARM_SMMU_PRIQ_BASE 0xc0 -#define ARM_SMMU_PRIQ_PROD 0x100c8 -#define ARM_SMMU_PRIQ_CONS 0x100cc +#define ARM_SMMU_PRIQ_PROD 0xc8 +#define ARM_SMMU_PRIQ_CONS 0xcc #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0 #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc From 932bc8c7d742c30ce61f2add6cd1340795b0051c Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Fri, 22 Jan 2021 21:14:48 +0800 Subject: [PATCH 125/414] iommu/arm-smmu-v3: Use DEFINE_RES_MEM() to simplify code No functional change. Signed-off-by: Zhen Lei Link: https://lore.kernel.org/r/20210122131448.1167-1-thunder.leizhen@huawei.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index bca458c00e48..f04c55a7503c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3479,11 +3479,7 @@ err_reset_pci_ops: __maybe_unused; static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, resource_size_t size) { - struct resource res = { - .flags = IORESOURCE_MEM, - .start = start, - .end = start + size - 1, - }; + struct resource res = DEFINE_RES_MEM(start, size); return devm_ioremap_resource(dev, &res); } From eba8d2f8f80315094b61eaf2bc6cd481741d6d93 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 22 Jan 2021 16:10:54 +0100 Subject: [PATCH 126/414] iommu/arm-smmu-v3: Split arm_smmu_tlb_inv_range() Extract some of the cmd initialization and the ATC invalidation from arm_smmu_tlb_inv_range(), to allow an MMU notifier to invalidate a VA range by ASID. Reviewed-by: Jonathan Cameron Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210122151054.2833521-2-jean-philippe@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 63 ++++++++++++--------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index f04c55a7503c..86cbac77c941 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1658,40 +1658,28 @@ static void arm_smmu_tlb_inv_context(void *cookie) arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); } -static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size, - size_t granule, bool leaf, - struct arm_smmu_domain *smmu_domain) +static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, + unsigned long iova, size_t size, + size_t granule, + struct arm_smmu_domain *smmu_domain) { struct arm_smmu_device *smmu = smmu_domain->smmu; - unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0; + unsigned long end = iova + size, num_pages = 0, tg = 0; size_t inv_range = granule; struct arm_smmu_cmdq_batch cmds = {}; - struct arm_smmu_cmdq_ent cmd = { - .tlbi = { - .leaf = leaf, - }, - }; if (!size) return; - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - cmd.opcode = CMDQ_OP_TLBI_NH_VA; - cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid; - } else { - cmd.opcode = CMDQ_OP_TLBI_S2_IPA; - cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; - } - if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { /* Get the leaf page size */ tg = __ffs(smmu_domain->domain.pgsize_bitmap); /* Convert page size of 12,14,16 (log2) to 1,2,3 */ - cmd.tlbi.tg = (tg - 10) / 2; + cmd->tlbi.tg = (tg - 10) / 2; /* Determine what level the granule is at */ - cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); + cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); num_pages = size >> tg; } @@ -1709,11 +1697,11 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size, /* Determine the power of 2 multiple number of pages */ scale = __ffs(num_pages); - cmd.tlbi.scale = scale; + cmd->tlbi.scale = scale; /* Determine how many chunks of 2^scale size we have */ num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX; - cmd.tlbi.num = num - 1; + cmd->tlbi.num = num - 1; /* range is num * 2^scale * pgsize */ inv_range = num << (scale + tg); @@ -1722,17 +1710,37 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size, num_pages -= num << scale; } - cmd.tlbi.addr = iova; - arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); + cmd->tlbi.addr = iova; + arm_smmu_cmdq_batch_add(smmu, &cmds, cmd); iova += inv_range; } arm_smmu_cmdq_batch_submit(smmu, &cmds); +} + +static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, + size_t granule, bool leaf, + struct arm_smmu_domain *smmu_domain) +{ + struct arm_smmu_cmdq_ent cmd = { + .tlbi = { + .leaf = leaf, + }, + }; + + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { + cmd.opcode = CMDQ_OP_TLBI_NH_VA; + cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid; + } else { + cmd.opcode = CMDQ_OP_TLBI_S2_IPA; + cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; + } + __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); /* * Unfortunately, this can't be leaf-only since we may have * zapped an entire table. */ - arm_smmu_atc_inv_domain(smmu_domain, 0, start, size); + arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size); } static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, @@ -1748,7 +1756,7 @@ static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, size_t granule, void *cookie) { - arm_smmu_tlb_inv_range(iova, size, granule, false, cookie); + arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie); } static const struct iommu_flush_ops arm_smmu_flush_ops = { @@ -2271,8 +2279,9 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start, - gather->pgsize, true, smmu_domain); + arm_smmu_tlb_inv_range_domain(gather->start, + gather->end - gather->start, + gather->pgsize, true, smmu_domain); } static phys_addr_t From 51d113c3be09ff5b916576b1109daf413549cacc Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 22 Jan 2021 16:10:55 +0100 Subject: [PATCH 127/414] iommu/arm-smmu-v3: Make BTM optional for SVA When BTM isn't supported by the SMMU, send invalidations on the command queue. Reviewed-by: Jonathan Cameron Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210122151054.2833521-3-jean-philippe@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 10 +++++++--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index e13b092e6004..bb251cab61f3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -182,9 +182,13 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn, unsigned long start, unsigned long end) { struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); + struct arm_smmu_domain *smmu_domain = smmu_mn->domain; + size_t size = end - start + 1; - arm_smmu_atc_inv_domain(smmu_mn->domain, mm->pasid, start, - end - start + 1); + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) + arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid, + PAGE_SIZE, false, smmu_domain); + arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size); } static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) @@ -391,7 +395,7 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) unsigned long reg, fld; unsigned long oas; unsigned long asid_bits; - u32 feat_mask = ARM_SMMU_FEAT_BTM | ARM_SMMU_FEAT_COHERENCY; + u32 feat_mask = ARM_SMMU_FEAT_COHERENCY; if (vabits_actual == 52) feat_mask |= ARM_SMMU_FEAT_VAX; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 86cbac77c941..111467888e88 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1743,6 +1743,21 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size); } +void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, + size_t granule, bool leaf, + struct arm_smmu_domain *smmu_domain) +{ + struct arm_smmu_cmdq_ent cmd = { + .opcode = CMDQ_OP_TLBI_NH_VA, + .tlbi = { + .asid = asid, + .leaf = leaf, + }, + }; + + __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); +} + static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, unsigned long iova, size_t granule, void *cookie) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index da525f46dab4..56bc0c3d4f4a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -694,6 +694,9 @@ extern struct arm_smmu_ctx_desc quiet_cd; int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, struct arm_smmu_ctx_desc *cd); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); +void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, + size_t granule, bool leaf, + struct arm_smmu_domain *smmu_domain); bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd); int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, unsigned long iova, size_t size); From 9111aebf770d6a6c9fcfd6301da4a0025f2d610f Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 22 Jan 2021 16:10:56 +0100 Subject: [PATCH 128/414] iommu/arm-smmu-v3: Add support for VHE ARMv8.1 extensions added Virtualization Host Extensions (VHE), which allow to run a host kernel at EL2. When using normal DMA, Device and CPU address spaces are dissociated, and do not need to implement the same capabilities, so VHE hasn't been used in the SMMU until now. With shared address spaces however, ASIDs are shared between MMU and SMMU, and broadcast TLB invalidations issued by a CPU are taken into account by the SMMU. TLB entries on both sides need to have identical exception level in order to be cleared with a single invalidation. When the CPU is using VHE, enable VHE in the SMMU for all STEs. Normal DMA mappings will need to use TLBI_EL2 commands instead of TLBI_NH, but shouldn't be otherwise affected by this change. Acked-by: Will Deacon Reviewed-by: Jonathan Cameron Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210122151054.2833521-4-jean-philippe@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 32 ++++++++++++++++----- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 ++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 111467888e88..baebaac34a83 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -263,9 +263,11 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); break; case CMDQ_OP_TLBI_NH_VA: + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); + fallthrough; + case CMDQ_OP_TLBI_EL2_VA: cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); @@ -287,6 +289,9 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) case CMDQ_OP_TLBI_S12_VMALL: cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); break; + case CMDQ_OP_TLBI_EL2_ASID: + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); + break; case CMDQ_OP_ATC_INV: cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global); @@ -877,7 +882,8 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) { struct arm_smmu_cmdq_ent cmd = { - .opcode = CMDQ_OP_TLBI_NH_ASID, + .opcode = smmu->features & ARM_SMMU_FEAT_E2H ? + CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID, .tlbi.asid = asid, }; @@ -1260,13 +1266,16 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, } if (s1_cfg) { + u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ? + STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1; + BUG_ON(ste_live); dst[1] = cpu_to_le64( FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | - FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1)); + FIELD_PREP(STRTAB_STE_1_STRW, strw)); if (smmu->features & ARM_SMMU_FEAT_STALLS && !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) @@ -1728,7 +1737,8 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, }; if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - cmd.opcode = CMDQ_OP_TLBI_NH_VA; + cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? + CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA; cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid; } else { cmd.opcode = CMDQ_OP_TLBI_S2_IPA; @@ -1748,7 +1758,8 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, struct arm_smmu_domain *smmu_domain) { struct arm_smmu_cmdq_ent cmd = { - .opcode = CMDQ_OP_TLBI_NH_VA, + .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? + CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA, .tlbi = { .asid = asid, .leaf = leaf, @@ -3076,7 +3087,11 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) writel_relaxed(reg, smmu->base + ARM_SMMU_CR1); /* CR2 (random crap) */ - reg = CR2_PTM | CR2_RECINVSID | CR2_E2H; + reg = CR2_PTM | CR2_RECINVSID; + + if (smmu->features & ARM_SMMU_FEAT_E2H) + reg |= CR2_E2H; + writel_relaxed(reg, smmu->base + ARM_SMMU_CR2); /* Stream table */ @@ -3235,8 +3250,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) smmu->options |= ARM_SMMU_OPT_MSIPOLL; } - if (reg & IDR0_HYP) + if (reg & IDR0_HYP) { smmu->features |= ARM_SMMU_FEAT_HYP; + if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) + smmu->features |= ARM_SMMU_FEAT_E2H; + } /* * The coherency feature as set by FW is used in preference to the ID diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 56bc0c3d4f4a..f985817c967a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -430,6 +430,8 @@ struct arm_smmu_cmdq_ent { #define CMDQ_OP_TLBI_NH_ASID 0x11 #define CMDQ_OP_TLBI_NH_VA 0x12 #define CMDQ_OP_TLBI_EL2_ALL 0x20 + #define CMDQ_OP_TLBI_EL2_ASID 0x21 + #define CMDQ_OP_TLBI_EL2_VA 0x22 #define CMDQ_OP_TLBI_S12_VMALL 0x28 #define CMDQ_OP_TLBI_S2_IPA 0x2a #define CMDQ_OP_TLBI_NSNH_ALL 0x30 @@ -604,6 +606,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_RANGE_INV (1 << 15) #define ARM_SMMU_FEAT_BTM (1 << 16) #define ARM_SMMU_FEAT_SVA (1 << 17) +#define ARM_SMMU_FEAT_E2H (1 << 18) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) From 5e769e444d262f7202a6bb88dba98bf5fd6c8f3c Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:44:50 +0000 Subject: [PATCH 129/414] RDMA/hw/mlx5/odp: Fix formatting and add missing descriptions in 'pagefault_data_segments()' Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/mlx5/odp.c:1062: warning: Function parameter or member 'dev' not described in 'pagefault_data_segments' drivers/infiniband/hw/mlx5/odp.c:1062: warning: Function parameter or member 'pfault' not described in 'pagefault_data_segments' drivers/infiniband/hw/mlx5/odp.c:1062: warning: Function parameter or member 'wqe' not described in 'pagefault_data_segments' drivers/infiniband/hw/mlx5/odp.c:1062: warning: Function parameter or member 'wqe_end' not described in 'pagefault_data_segments' drivers/infiniband/hw/mlx5/odp.c:1062: warning: Function parameter or member 'bytes_mapped' not described in 'pagefault_data_segments' drivers/infiniband/hw/mlx5/odp.c:1062: warning: Function parameter or member 'total_wqe_bytes' not described in 'pagefault_data_segments' drivers/infiniband/hw/mlx5/odp.c:1062: warning: Function parameter or member 'receive_queue' not described in 'pagefault_data_segments' Link: https://lore.kernel.org/r/20210121094519.2044049-2-lee.jones@linaro.org Cc: Leon Romanovsky Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 440fbf7b66e4..e77b1db73893 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1124,16 +1124,18 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, /** * Parse a series of data segments for page fault handling. * - * @pfault contains page fault information. - * @wqe points at the first data segment in the WQE. - * @wqe_end points after the end of the WQE. - * @bytes_mapped receives the number of bytes that the function was able to - * map. This allows the caller to decide intelligently whether - * enough memory was mapped to resolve the page fault - * successfully (e.g. enough for the next MTU, or the entire - * WQE). - * @total_wqe_bytes receives the total data size of this WQE in bytes (minus - * the committed bytes). + * @dev: Pointer to mlx5 IB device + * @pfault: contains page fault information. + * @wqe: points at the first data segment in the WQE. + * @wqe_end: points after the end of the WQE. + * @bytes_mapped: receives the number of bytes that the function was able to + * map. This allows the caller to decide intelligently whether + * enough memory was mapped to resolve the page fault + * successfully (e.g. enough for the next MTU, or the entire + * WQE). + * @total_wqe_bytes: receives the total data size of this WQE in bytes (minus + * the committed bytes). + * @receive_queue: receive WQE end of sg list * * Returns the number of pages loaded if positive, zero for an empty WQE, or a * negative error code. From 30cd9fc5e734327d5daf10abed93fda451f1b463 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:44:51 +0000 Subject: [PATCH 130/414] RDMA/hw/mlx5/qp: Demote non-conformant kernel-doc header Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/mlx5/qp.c:5384: warning: Function parameter or member 'qp' not described in 'mlx5_ib_qp_set_counter' drivers/infiniband/hw/mlx5/qp.c:5384: warning: Function parameter or member 'counter' not described in 'mlx5_ib_qp_set_counter' Link: https://lore.kernel.org/r/20210121094519.2044049-3-lee.jones@linaro.org Cc: Leon Romanovsky Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0cb7cc642d87..c916e48b2e52 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -5376,7 +5376,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp) handle_drain_completion(cq, &rdrain, dev); } -/** +/* * Bind a qp to a counter. If @counter is NULL then bind the qp to * the default counter */ From 9b3ae173e6c48824a928e1f6aa252cc52b036210 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:44:52 +0000 Subject: [PATCH 131/414] RDMA/hw/efa/efa_com: Stop using param description notation for non-params Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/efa/efa_com.c:801: warning: Excess function parameter 'note' description in 'efa_com_admin_q_comp_intr_handler' Link: https://lore.kernel.org/r/20210121094519.2044049-4-lee.jones@linaro.org Cc: Gal Pressman Cc: Yossi Leybovich Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Acked-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 336bc2c57bb1..f7242188a843 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -795,7 +795,7 @@ int efa_com_admin_init(struct efa_com_dev *edev, * This method goes over the admin completion queue and wakes up * all the pending threads that wait on the commands wait event. * - * @note: Should be called after MSI-X interrupt. + * Note: Should be called after MSI-X interrupt. */ void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev) { From 006110eef7150fb9e2bda4ab94cf18c492cd489a Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:44:53 +0000 Subject: [PATCH 132/414] RDMA/hw/hns/hns_roce_hw_v1: Fix doc-rot issue relating to 'rereset' Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hns/hns_roce_hw_v1.c:1398: warning: Function parameter or member 'dereset' not described in 'hns_roce_v1_reset' drivers/infiniband/hw/hns/hns_roce_hw_v1.c:1398: warning: Excess function parameter 'enable' description in 'hns_roce_v1_reset' Link: https://lore.kernel.org/r/20210121094519.2044049-5-lee.jones@linaro.org Cc: Lijun Ou Cc: Weihang Li Cc: Doug Ledford Cc: Jason Gunthorpe Cc: Wei Hu Cc: Nenglong Zhao Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index f68585ff8e8a..23fe8e9f61da 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1391,7 +1391,7 @@ static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev) /** * hns_roce_v1_reset - reset RoCE * @hr_dev: RoCE device struct pointer - * @enable: true -- drop reset, false -- reset + * @dereset: true -- drop reset, false -- reset * return 0 - success , negative --fail */ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) From 779f2f5e7d2d598abd23882fdc0236aefca6c79a Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:44:54 +0000 Subject: [PATCH 133/414] RDMA/hw/hns/hns_roce_mr: Add missing description for 'hr_dev' param Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hns/hns_roce_mr.c:1003: warning: Function parameter or member 'hr_dev' not described in 'hns_roce_mtr_create' Link: https://lore.kernel.org/r/20210121094519.2044049-6-lee.jones@linaro.org Cc: Lijun Ou Cc: Weihang Li Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 1bcffd93ff3e..1fbfa3a37545 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -990,6 +990,7 @@ static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, /** * hns_roce_mtr_create - Create hns memory translate region. * + * @hr_dev: RoCE device struct pointer * @mtr: memory translate region * @buf_attr: buffer attribute for creating mtr * @ba_page_shift: page shift for multi-hop base address table From 65a62ec08ca46a2d96092617ff3e4c9599edf331 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:44:55 +0000 Subject: [PATCH 134/414] RDMA/hw/qib/qib_driver: Fix misspelling in 'ppd's param description Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_driver.c:165: warning: Function parameter or member 'ppd' not described in 'qib_wait_linkstate' drivers/infiniband/hw/qib/qib_driver.c:165: warning: Excess function parameter 'dd' description in 'qib_wait_linkstate' Link: https://lore.kernel.org/r/20210121094519.2044049-7-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: Intel Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 92eeea5679e2..84fc4dcc5399 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -151,7 +151,7 @@ int qib_count_units(int *npresentp, int *nupp) /** * qib_wait_linkstate - wait for an IB link state change to occur - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @state: the state to wait for * @msecs: the number of milliseconds to wait * From 39412461fc3c8e9d08ceb20f26b433ef45f9f4ab Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:44:56 +0000 Subject: [PATCH 135/414] RDMA/sw/rdmavt/vt: Fix formatting issue and update description for 'context' Fixes the following W=1 kernel build warning(s): drivers/infiniband/sw/rdmavt/vt.c:300: warning: Function parameter or member 'context' not described in 'rvt_dealloc_ucontext' Link: https://lore.kernel.org/r/20210121094519.2044049-8-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 49cec85a372a..8fd0128a9336 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -294,7 +294,7 @@ static int rvt_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) /** * rvt_dealloc_ucontext - Free a user context - * @context - Free this + * @context: Unused */ static void rvt_dealloc_ucontext(struct ib_ucontext *context) { From aab5a7e909f6ec27ba28c03b01cbc950ede3938b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:44:57 +0000 Subject: [PATCH 136/414] RDMA/hw/qib/qib_eeprom: Fix misspelling of 'buff' in 'qib_eeprom_{read,write}()' Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_eeprom.c:55: warning: Function parameter or member 'buff' not described in 'qib_eeprom_read' drivers/infiniband/hw/qib/qib_eeprom.c:55: warning: Excess function parameter 'buffer' description in 'qib_eeprom_read' drivers/infiniband/hw/qib/qib_eeprom.c:102: warning: Function parameter or member 'buff' not described in 'qib_eeprom_write' drivers/infiniband/hw/qib/qib_eeprom.c:102: warning: Excess function parameter 'buffer' description in 'qib_eeprom_write' Link: https://lore.kernel.org/r/20210121094519.2044049-9-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_eeprom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c index 5838b3bf34b9..bf660c001b6d 100644 --- a/drivers/infiniband/hw/qib/qib_eeprom.c +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -47,7 +47,7 @@ * qib_eeprom_read - receives bytes from the eeprom via I2C * @dd: the qlogic_ib device * @eeprom_offset: address to read from - * @buffer: where to store result + * @buff: where to store result * @len: number of bytes to receive */ int qib_eeprom_read(struct qib_devdata *dd, u8 eeprom_offset, @@ -94,7 +94,7 @@ static int eeprom_write_with_enable(struct qib_devdata *dd, u8 offset, * qib_eeprom_write - writes data to the eeprom via I2C * @dd: the qlogic_ib device * @eeprom_offset: where to place data - * @buffer: data to write + * @buff: data to write * @len: number of bytes to write */ int qib_eeprom_write(struct qib_devdata *dd, u8 eeprom_offset, From 7040557ca997611191ad75a7111821bb80c6d49b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:44:58 +0000 Subject: [PATCH 137/414] RDMA/hw/qib/qib_mad: Fix a few misspellings and supply missing descriptions Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_mad.c:896: warning: Function parameter or member 'ppd' not described in 'rm_pkey' drivers/infiniband/hw/qib/qib_mad.c:896: warning: Excess function parameter 'dd' description in 'rm_pkey' drivers/infiniband/hw/qib/qib_mad.c:926: warning: Function parameter or member 'ppd' not described in 'add_pkey' drivers/infiniband/hw/qib/qib_mad.c:926: warning: Excess function parameter 'dd' description in 'add_pkey' drivers/infiniband/hw/qib/qib_mad.c:2365: warning: Function parameter or member 'in' not described in 'qib_process_mad' drivers/infiniband/hw/qib/qib_mad.c:2365: warning: Function parameter or member 'out' not described in 'qib_process_mad' drivers/infiniband/hw/qib/qib_mad.c:2365: warning: Function parameter or member 'out_mad_size' not described in 'qib_process_mad' drivers/infiniband/hw/qib/qib_mad.c:2365: warning: Function parameter or member 'out_mad_pkey_index' not described in 'qib_process_mad' drivers/infiniband/hw/qib/qib_mad.c:2365: warning: Excess function parameter 'in_mad' description in 'qib_process_mad' drivers/infiniband/hw/qib/qib_mad.c:2365: warning: Excess function parameter 'out_mad' description in 'qib_process_mad' Link: https://lore.kernel.org/r/20210121094519.2044049-10-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_mad.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index f83e331977f8..44e2f813024a 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -886,7 +886,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, /** * rm_pkey - decrecment the reference count for the given PKEY - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @key: the PKEY index * * Return true if this was the last reference and the hardware table entry @@ -916,7 +916,7 @@ static int rm_pkey(struct qib_pportdata *ppd, u16 key) /** * add_pkey - add the given PKEY to the hardware table - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @key: the PKEY * * Return an error code if unable to add the entry, zero if no change, @@ -2346,8 +2346,10 @@ static int process_cc(struct ib_device *ibdev, int mad_flags, * @port: the port number this packet came in on * @in_wc: the work completion entry for this packet * @in_grh: the global route header for this packet - * @in_mad: the incoming MAD - * @out_mad: any outgoing MAD reply + * @in: the incoming MAD + * @out: any outgoing MAD reply + * @out_mad_size: size of the outgoing MAD reply + * @out_mad_pkey_index: unused * * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not * interested in processing. From 305f2261f574a99c5da969885915f68e6084b59b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:44:59 +0000 Subject: [PATCH 138/414] RDMA/hw/qib/qib_intr: Fix a bunch of formatting issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_intr.c:48: warning: Function parameter or member 'msg' not described in 'qib_format_hwmsg' drivers/infiniband/hw/qib/qib_intr.c:48: warning: Function parameter or member 'msgl' not described in 'qib_format_hwmsg' drivers/infiniband/hw/qib/qib_intr.c:48: warning: Function parameter or member 'hwmsg' not described in 'qib_format_hwmsg' drivers/infiniband/hw/qib/qib_intr.c:64: warning: Function parameter or member 'hwerrs' not described in 'qib_format_hwerrors' drivers/infiniband/hw/qib/qib_intr.c:64: warning: Function parameter or member 'hwerrmsgs' not described in 'qib_format_hwerrors' drivers/infiniband/hw/qib/qib_intr.c:64: warning: Function parameter or member 'nhwerrmsgs' not described in 'qib_format_hwerrors' drivers/infiniband/hw/qib/qib_intr.c:64: warning: Function parameter or member 'msg' not described in 'qib_format_hwerrors' drivers/infiniband/hw/qib/qib_intr.c:64: warning: Function parameter or member 'msgl' not described in 'qib_format_hwerrors' Link: https://lore.kernel.org/r/20210121094519.2044049-11-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_intr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c index 65c3b964ad1b..85c3187d796d 100644 --- a/drivers/infiniband/hw/qib/qib_intr.c +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -40,9 +40,9 @@ /** * qib_format_hwmsg - format a single hwerror message - * @msg message buffer - * @msgl length of message buffer - * @hwmsg message to add to message buffer + * @msg: message buffer + * @msgl: length of message buffer + * @hwmsg: message to add to message buffer */ static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) { @@ -53,11 +53,11 @@ static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) /** * qib_format_hwerrors - format hardware error messages for display - * @hwerrs hardware errors bit vector - * @hwerrmsgs hardware error descriptions - * @nhwerrmsgs number of hwerrmsgs - * @msg message buffer - * @msgl message buffer length + * @hwerrs: hardware errors bit vector + * @hwerrmsgs: hardware error descriptions + * @nhwerrmsgs: number of hwerrmsgs + * @msg: message buffer + * @msgl: message buffer length */ void qib_format_hwerrors(u64 hwerrs, const struct qib_hwerror_msgs *hwerrmsgs, size_t nhwerrmsgs, char *msg, size_t msgl) From 63f774c512ceafc44e8bdd3ed64fc7c8ef325823 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:00 +0000 Subject: [PATCH 139/414] RDMA/hw/qib/qib_pcie: Demote obvious kernel-doc abuse Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_pcie.c:190: warning: Function parameter or member 'dd' not described in 'qib_cache_msi_info' drivers/infiniband/hw/qib/qib_pcie.c:190: warning: Function parameter or member 'pos' not described in 'qib_cache_msi_info' Link: https://lore.kernel.org/r/20210121094519.2044049-12-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 2e07b3749b88..cb2a02d671e2 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -181,7 +181,7 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd) pci_set_drvdata(dd->pcidev, NULL); } -/** +/* * We save the msi lo and hi values, so we can restore them after * chip reset (the kernel PCI infrastructure doesn't yet handle that * correctly. From 0bbc2aea2b8adf7599cd1391452258a6fdca8d1f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:01 +0000 Subject: [PATCH 140/414] RDMA/hw/qib/qib_qp: Fix some issues in worthy kernel-doc headers and demote another Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_qp.c:214: warning: Function parameter or member 'rdi' not described in 'qib_free_all_qps' drivers/infiniband/hw/qib/qib_qp.c:387: warning: Function parameter or member 'qp' not described in 'qib_check_send_wqe' drivers/infiniband/hw/qib/qib_qp.c:387: warning: Function parameter or member 'wqe' not described in 'qib_check_send_wqe' drivers/infiniband/hw/qib/qib_qp.c:387: warning: Function parameter or member 'call_send' not described in 'qib_check_send_wqe' drivers/infiniband/hw/qib/qib_qp.c:425: warning: Function parameter or member 's' not described in 'qib_qp_iter_print' drivers/infiniband/hw/qib/qib_qp.c:425: warning: Function parameter or member 'iter' not described in 'qib_qp_iter_print' Link: https://lore.kernel.org/r/20210121094519.2044049-13-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_qp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 8d0563ef5be1..ca39a029e4af 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -207,7 +207,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, return ret; } -/** +/* * qib_free_all_qps - check for QPs still in use */ unsigned qib_free_all_qps(struct rvt_dev_info *rdi) @@ -376,9 +376,9 @@ void qib_flush_qp_waiters(struct rvt_qp *qp) /** * qib_check_send_wqe - validate wr/wqe - * @qp - The qp - * @wqe - The built wqe - * @call_send - Determine if the send should be posted or scheduled + * @qp: The qp + * @wqe: The built wqe + * @call_send: Determine if the send should be posted or scheduled * * Returns 0 on success, -EINVAL on failure */ @@ -418,8 +418,8 @@ static const char * const qp_type_str[] = { /** * qib_qp_iter_print - print information to seq_file - * @s - the seq_file - * @iter - the iterator + * @s: the seq_file + * @iter: the iterator */ void qib_qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) { From 8effbe0563cee9a3ad49e5123445363849296ec8 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:02 +0000 Subject: [PATCH 141/414] RDMA/sw/rdmavt/cq: Demote hardly complete kernel-doc header Fixes the following W=1 kernel build warning(s): drivers/infiniband/sw/rdmavt/cq.c:381: warning: Function parameter or member 'cqe' not described in 'rvt_resize_cq' drivers/infiniband/sw/rdmavt/cq.c:381: warning: Function parameter or member 'udata' not described in 'rvt_resize_cq' Link: https://lore.kernel.org/r/20210121094519.2044049-14-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 20cc0799ac4b..5138afca067f 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -371,7 +371,7 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) return ret; } -/** +/* * rvt_resize_cq - change the size of the CQ * @ibcq: the completion queue * From 24d02e04943a6f1b1d83fe1fc84db35c7f50172a Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:03 +0000 Subject: [PATCH 142/414] RDMA/hw/qib/qib_rc: Fix some worthy kernel-docs demote hardly complete one Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_rc.c:216: warning: Function parameter or member 'flags' not described in 'qib_make_rc_req' drivers/infiniband/hw/qib/qib_rc.c:1008: warning: Function parameter or member 'aeth' not described in 'do_rc_ack' drivers/infiniband/hw/qib/qib_rc.c:1008: warning: Function parameter or member 'val' not described in 'do_rc_ack' drivers/infiniband/hw/qib/qib_rc.c:1008: warning: Function parameter or member 'rcd' not described in 'do_rc_ack' drivers/infiniband/hw/qib/qib_rc.c:1274: warning: Function parameter or member 'rcd' not described in 'qib_rc_rcv_resp' drivers/infiniband/hw/qib/qib_rc.c:1497: warning: Function parameter or member 'rcd' not described in 'qib_rc_rcv_error' Link: https://lore.kernel.org/r/20210121094519.2044049-15-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_rc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 3915e5b4a9bc..a1c20ffb4490 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -207,6 +207,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, /** * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP + * @flags: unused * * Assumes the s_lock is held. * @@ -992,7 +993,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, return wqe; } -/** +/* * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on * @psn: the packet sequence number of the ACK @@ -1259,6 +1260,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, * @psn: the packet sequence number for this packet * @hdrsize: the header length * @pmtu: the path MTU + * @rcd: the context pointer * * This is called from qib_rc_rcv() to process an incoming RC response * packet for the given QP. @@ -1480,6 +1482,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, * @opcode: the opcode for this packet * @psn: the packet sequence number for this packet * @diff: the difference between the PSN and the expected PSN + * @rcd: the context pointer * * This is called from qib_rc_rcv() to process an unexpected * incoming RC packet for the given QP. From da0940e5a586646788c6d0de46cdd305dd46e9f5 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:04 +0000 Subject: [PATCH 143/414] RDMA/hw/hfi1/chip: Fix a bunch of kernel-doc formatting and spelling issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/chip.c:1335: warning: Function parameter or member 'dd' not described in 'hfi1_addr_from_offset' drivers/infiniband/hw/hfi1/chip.c:1335: warning: Function parameter or member 'offset' not described in 'hfi1_addr_from_offset' drivers/infiniband/hw/hfi1/chip.c:1350: warning: Function parameter or member 'dd' not described in 'read_csr' drivers/infiniband/hw/hfi1/chip.c:1350: warning: Function parameter or member 'offset' not described in 'read_csr' drivers/infiniband/hw/hfi1/chip.c:1363: warning: Function parameter or member 'dd' not described in 'write_csr' drivers/infiniband/hw/hfi1/chip.c:1363: warning: Function parameter or member 'offset' not described in 'write_csr' drivers/infiniband/hw/hfi1/chip.c:1363: warning: Function parameter or member 'value' not described in 'write_csr' drivers/infiniband/hw/hfi1/chip.c:1385: warning: Function parameter or member 'dd' not described in 'get_csr_addr' drivers/infiniband/hw/hfi1/chip.c:1385: warning: Function parameter or member 'offset' not described in 'get_csr_addr' drivers/infiniband/hw/hfi1/chip.c:8442: warning: Function parameter or member 'rcd' not described in 'receive_interrupt_common' drivers/infiniband/hw/hfi1/chip.c:8491: warning: Function parameter or member 'napi' not described in 'hfi1_netdev_rx_napi' drivers/infiniband/hw/hfi1/chip.c:8491: warning: Function parameter or member 'budget' not described in 'hfi1_netdev_rx_napi' drivers/infiniband/hw/hfi1/chip.c:14221: warning: Function parameter or member 'dd' not described in 'init_qpmap_table' drivers/infiniband/hw/hfi1/chip.c:14221: warning: Function parameter or member 'first_ctxt' not described in 'init_qpmap_table' drivers/infiniband/hw/hfi1/chip.c:14221: warning: Function parameter or member 'last_ctxt' not described in 'init_qpmap_table' drivers/infiniband/hw/hfi1/chip.c:14399: warning: Function parameter or member 'dd' not described in 'init_qos' drivers/infiniband/hw/hfi1/chip.c:14399: warning: Function parameter or member 'rmt' not described in 'init_qos' drivers/infiniband/hw/hfi1/chip.c:15032: warning: Function parameter or member 'dd' not described in 'hfi1_init_dd' drivers/infiniband/hw/hfi1/chip.c:15032: warning: Excess function parameter 'dev' description in 'hfi1_init_dd' drivers/infiniband/hw/hfi1/chip.c:15032: warning: Excess function parameter 'ent' description in 'hfi1_init_dd' drivers/infiniband/hw/hfi1/chip.c:15395: warning: Function parameter or member 'ppd' not described in 'create_pbc' drivers/infiniband/hw/hfi1/chip.c:15395: warning: Function parameter or member 'srate_mbs' not described in 'create_pbc' drivers/infiniband/hw/hfi1/chip.c:15395: warning: Function parameter or member 'dw_len' not described in 'create_pbc' drivers/infiniband/hw/hfi1/chip.c:15395: warning: Excess function parameter 'srate' description in 'create_pbc' drivers/infiniband/hw/hfi1/chip.c:15395: warning: Excess function parameter 'dwlen' description in 'create_pbc' Link: https://lore.kernel.org/r/20210121094519.2044049-16-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 46 +++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index c87b94ea2939..993cbf37e0b9 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1323,8 +1323,8 @@ CNTR_ELEM(#name, \ /** * hfi_addr_from_offset - return addr for readq/writeq - * @dd - the dd device - * @offset - the offset of the CSR within bar0 + * @dd: the dd device + * @offset: the offset of the CSR within bar0 * * This routine selects the appropriate base address * based on the indicated offset. @@ -1340,8 +1340,8 @@ static inline void __iomem *hfi1_addr_from_offset( /** * read_csr - read CSR at the indicated offset - * @dd - the dd device - * @offset - the offset of the CSR within bar0 + * @dd: the dd device + * @offset: the offset of the CSR within bar0 * * Return: the value read or all FF's if there * is no mapping @@ -1355,9 +1355,9 @@ u64 read_csr(const struct hfi1_devdata *dd, u32 offset) /** * write_csr - write CSR at the indicated offset - * @dd - the dd device - * @offset - the offset of the CSR within bar0 - * @value - value to write + * @dd: the dd device + * @offset: the offset of the CSR within bar0 + * @value: value to write */ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) { @@ -1373,8 +1373,8 @@ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) /** * get_csr_addr - return te iomem address for offset - * @dd - the dd device - * @offset - the offset of the CSR within bar0 + * @dd: the dd device + * @offset: the offset of the CSR within bar0 * * Return: The iomem address to use in subsequent * writeq/readq operations. @@ -8433,7 +8433,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd) return hfi1_rcd_head(rcd) != tail; } -/** +/* * Common code for receive contexts interrupt handlers. * Update traces, increment kernel IRQ counter and * setup ASPM when needed. @@ -8447,7 +8447,7 @@ static void receive_interrupt_common(struct hfi1_ctxtdata *rcd) aspm_ctx_disable(rcd); } -/** +/* * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt * when there are packets present in the queue. When calling * with interrupts enabled please use hfi1_rcd_eoi_intr. @@ -8484,8 +8484,8 @@ static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) /** * hfi1_netdev_rx_napi - napi poll function to move eoi inline - * @napi - pointer to napi object - * @budget - netdev budget + * @napi: pointer to napi object + * @budget: netdev budget */ int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget) { @@ -10142,7 +10142,7 @@ u32 lrh_max_header_bytes(struct hfi1_devdata *dd) /* * Set Send Length - * @ppd - per port data + * @ppd: per port data * * Set the MTU by limiting how many DWs may be sent. The SendLenCheck* * registers compare against LRH.PktLen, so use the max bytes included @@ -14200,9 +14200,9 @@ u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx) /** * init_qpmap_table - * @dd - device data - * @first_ctxt - first context - * @last_ctxt - first context + * @dd: device data + * @first_ctxt: first context + * @last_ctxt: first context * * This return sets the qpn mapping table that * is indexed by qpn[8:1]. @@ -14383,8 +14383,8 @@ static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, /** * init_qos - init RX qos - * @dd - device data - * @rmt - RSM map table + * @dd: device data + * @rmt: RSM map table * * This routine initializes Rule 0 and the RSM map table to implement * quality of service (qos). @@ -15022,8 +15022,7 @@ static int check_int_registers(struct hfi1_devdata *dd) /** * hfi1_init_dd() - Initialize most of the dd structure. - * @dev: the pci_dev for hfi1_ib device - * @ent: pci_device_id struct for this dev + * @dd: the dd device * * This is global, and is called directly at init to set up the * chip-specific function pointers for later use. @@ -15378,10 +15377,11 @@ static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate, /** * create_pbc - build a pbc for transmission + * @ppd: info of physical Hfi port * @flags: special case flags or-ed in built pbc - * @srate: static rate + * @srate_mbs: static rate * @vl: vl - * @dwlen: dword length (header words + data words + pbc words) + * @dw_len: dword length (header words + data words + pbc words) * * Create a PBC with the given flags, rate, VL, and length. * From 04dccf5d0933b0a8be937f43c53ba478194716f1 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:05 +0000 Subject: [PATCH 144/414] RDMA/hw/qib/qib_twsi: Provide description for missing param 'last' Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_twsi.c:175: warning: Function parameter or member 'last' not described in 'rd_byte' Link: https://lore.kernel.org/r/20210121094519.2044049-17-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_twsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c index f5698664419b..97b8a2bf5c69 100644 --- a/drivers/infiniband/hw/qib/qib_twsi.c +++ b/drivers/infiniband/hw/qib/qib_twsi.c @@ -168,6 +168,7 @@ static void stop_cmd(struct qib_devdata *dd); /** * rd_byte - read a byte, sending STOP on last, else ACK * @dd: the qlogic_ib device + * @last: identifies the last read * * Returns byte shifted out of device */ From 6b3137fe71291d6c0c4090b7a7708944f8caf61f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:06 +0000 Subject: [PATCH 145/414] RDMA/hw/qib/qib_tx: Provide description for 'qib_chg_pioavailkernel()'s 'rcd' param Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_tx.c:383: warning: Function parameter or member 'rcd' not described in 'qib_chg_pioavailkernel' Link: https://lore.kernel.org/r/20210121094519.2044049-18-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_tx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index 29785eb84646..6a8148851f21 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -377,6 +377,7 @@ void qib_sendbuf_done(struct qib_devdata *dd, unsigned n) * @start: the starting send buffer number * @len: the number of send buffers * @avail: true if the buffers are available for kernel use, false otherwise + * @rcd: the context pointer */ void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start, unsigned len, u32 avail, struct qib_ctxtdata *rcd) From f8005fa870ff38b6c7c2675bfd564cee05b99474 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:07 +0000 Subject: [PATCH 146/414] RDMA/hw/qib/qib_uc: Provide description for missing 'flags' param Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_uc.c:49: warning: Function parameter or member 'flags' not described in 'qib_make_uc_req' Link: https://lore.kernel.org/r/20210121094519.2044049-19-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_uc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 554af4273a13..8e2bda77d8b9 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -40,6 +40,7 @@ /** * qib_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP + * @flags: unused * * Assumes the s_lock is held. * From 5209201585fb7f2102be8287001f5761a7f31dc2 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:08 +0000 Subject: [PATCH 147/414] RDMA/hw/qib/qib_ud: Provide description for 'qib_make_ud_req's 'flags' param Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_ud.c:231: warning: Function parameter or member 'flags' not described in 'qib_make_ud_req' Link: https://lore.kernel.org/r/20210121094519.2044049-20-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_ud.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 93ca21347959..81eda94bd279 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -222,6 +222,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) /** * qib_make_ud_req - construct a UD request packet * @qp: the QP + * @flags: flags to modify and pass back to caller * * Assumes the s_lock is held. * From e54e3db30ee7a64e0a83dcdff4db79a06c91102b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:09 +0000 Subject: [PATCH 148/414] RDMA/sw/rdmavt/mad: Fix 'rvt_process_mad()'s documentation header Fixes the following W=1 kernel build warning(s): drivers/infiniband/sw/rdmavt/mad.c:75: warning: Function parameter or member 'in' not described in 'rvt_process_mad' drivers/infiniband/sw/rdmavt/mad.c:75: warning: Function parameter or member 'in_mad_size' not described in 'rvt_process_mad' drivers/infiniband/sw/rdmavt/mad.c:75: warning: Function parameter or member 'out' not described in 'rvt_process_mad' drivers/infiniband/sw/rdmavt/mad.c:75: warning: Function parameter or member 'out_mad_size' not described in 'rvt_process_mad' drivers/infiniband/sw/rdmavt/mad.c:75: warning: Function parameter or member 'out_mad_pkey_index' not described in 'rvt_process_mad' drivers/infiniband/sw/rdmavt/mad.c:75: warning: Excess function parameter 'in_mad' description in 'rvt_process_mad' drivers/infiniband/sw/rdmavt/mad.c:75: warning: Excess function parameter 'out_mad' description in 'rvt_process_mad' Link: https://lore.kernel.org/r/20210121094519.2044049-21-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/mad.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index 108c71e3ac23..8cc4de9aa664 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -56,8 +56,11 @@ * @port_num: the port number this packet came in on, 1 based from ib core * @in_wc: the work completion entry for this packet * @in_grh: the global route header for this packet - * @in_mad: the incoming MAD - * @out_mad: any outgoing MAD reply + * @in: the incoming MAD + * @out_mad_size: size of the incoming MAD reply + * @out: any outgoing MAD reply + * @out_mad_size: size of the outgoing MAD reply + * @out_mad_pkey_index: unused * * Note that the verbs framework has already done the MAD sanity checks, * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE From 888bf760700a0c9b8e1180fbf205091b00819a54 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:10 +0000 Subject: [PATCH 149/414] RDMA/hw/qib/qib_user_pages: Demote non-conformant documentation header Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_user_pages.c:60: warning: Function parameter or member 'hwdev' not described in 'qib_map_page' drivers/infiniband/hw/qib/qib_user_pages.c:60: warning: Function parameter or member 'page' not described in 'qib_map_page' drivers/infiniband/hw/qib/qib_user_pages.c:60: warning: Function parameter or member 'daddr' not described in 'qib_map_page' Link: https://lore.kernel.org/r/20210121094519.2044049-22-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_user_pages.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index 4c24e83f3175..5d6cf7427431 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -43,7 +43,7 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages, unpin_user_pages_dirty_lock(p, num_pages, dirty); } -/** +/* * qib_map_page - a safety wrapper around pci_map_page() * * A dma_addr of all 0's is interpreted by the chip as "disabled". From 00d25ff67913ffa8370d5705af2f3c6fe5591fb1 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:11 +0000 Subject: [PATCH 150/414] RDMA/sw/rdmavt/mcast: Demote incomplete kernel-doc header Fixes the following W=1 kernel build warning(s): drivers/infiniband/sw/rdmavt/mcast.c:195: warning: Function parameter or member 'rdi' not described in 'rvt_mcast_add' drivers/infiniband/sw/rdmavt/mcast.c:195: warning: Function parameter or member 'ibp' not described in 'rvt_mcast_add' Link: https://lore.kernel.org/r/20210121094519.2044049-23-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/mcast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 5233a63d99a6..951abac13dbb 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -180,7 +180,7 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, } EXPORT_SYMBOL(rvt_mcast_find); -/** +/* * rvt_mcast_add - insert mcast GID into table and attach QP struct * @mcast: the mcast GID table * @mqp: the QP to attach From 9d2338ca5b5f0d141690dd580fbeb776fd22437b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:12 +0000 Subject: [PATCH 151/414] RDMA/hw/hfi1/exp_rcv: Fix some kernel-doc formatting issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/exp_rcv.c:56: warning: Function parameter or member 'set' not described in 'hfi1_exp_tid_set_init' drivers/infiniband/hw/hfi1/exp_rcv.c:66: warning: Function parameter or member 'rcd' not described in 'hfi1_exp_tid_group_init' drivers/infiniband/hw/hfi1/exp_rcv.c:77: warning: Function parameter or member 'rcd' not described in 'hfi1_alloc_ctxt_rcv_groups' drivers/infiniband/hw/hfi1/exp_rcv.c:114: warning: Function parameter or member 'rcd' not described in 'hfi1_free_ctxt_rcv_groups' Link: https://lore.kernel.org/r/20210121094519.2044049-24-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/exp_rcv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index e9d5cc8b771a..91f13140ddf2 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -50,7 +50,7 @@ /** * exp_tid_group_init - initialize exp_tid_set - * @set - the set + * @set: the set */ static void hfi1_exp_tid_set_init(struct exp_tid_set *set) { @@ -60,7 +60,7 @@ static void hfi1_exp_tid_set_init(struct exp_tid_set *set) /** * hfi1_exp_tid_group_init - initialize rcd expected receive - * @rcd - the rcd + * @rcd: the rcd */ void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd) { @@ -71,7 +71,7 @@ void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd) /** * alloc_ctxt_rcv_groups - initialize expected receive groups - * @rcd - the context to add the groupings to + * @rcd: the context to add the groupings to */ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) { @@ -101,7 +101,7 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) /** * free_ctxt_rcv_groups - free expected receive groups - * @rcd - the context to free + * @rcd: the context to free * * The routine dismantles the expect receive linked * list and clears any tids associated with the receive From f40164f964d414211e4560e50fc53cef9513d034 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:13 +0000 Subject: [PATCH 152/414] RDMA/hw/qib/qib_iba7220: Fix some kernel-doc issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_iba7220.c:1726: warning: Function parameter or member 'ppd' not described in 'qib_setup_7220_setextled' drivers/infiniband/hw/qib/qib_iba7220.c:1726: warning: Excess function parameter 'dd' description in 'qib_setup_7220_setextled' drivers/infiniband/hw/qib/qib_iba7220.c:2154: warning: Function parameter or member 'type' not described in 'qib_7220_put_tid' drivers/infiniband/hw/qib/qib_iba7220.c:2154: warning: Excess function parameter 'tidtype' description in 'qib_7220_put_tid' drivers/infiniband/hw/qib/qib_iba7220.c:2192: warning: Function parameter or member 'rcd' not described in 'qib_7220_clear_tids' drivers/infiniband/hw/qib/qib_iba7220.c:2192: warning: Excess function parameter 'ctxt' description in 'qib_7220_clear_tids' drivers/infiniband/hw/qib/qib_iba7220.c:2248: warning: Function parameter or member 'kinfo' not described in 'qib_7220_get_base_info' drivers/infiniband/hw/qib/qib_iba7220.c:2248: warning: Excess function parameter 'kbase' description in 'qib_7220_get_base_info' drivers/infiniband/hw/qib/qib_iba7220.c:2903: warning: Function parameter or member 'ppd' not described in 'qib_portcntr_7220' drivers/infiniband/hw/qib/qib_iba7220.c:2903: warning: Function parameter or member 'reg' not described in 'qib_portcntr_7220' drivers/infiniband/hw/qib/qib_iba7220.c:2903: warning: Excess function parameter 'dd' description in 'qib_portcntr_7220' drivers/infiniband/hw/qib/qib_iba7220.c:2903: warning: Excess function parameter 'creg' description in 'qib_portcntr_7220' drivers/infiniband/hw/qib/qib_iba7220.c:3242: warning: Function parameter or member 't' not described in 'qib_get_7220_faststats' drivers/infiniband/hw/qib/qib_iba7220.c:4479: warning: Function parameter or member 'pdev' not described in 'qib_init_iba7220_funcs' drivers/infiniband/hw/qib/qib_iba7220.c:4479: warning: Excess function parameter 'dev' description in 'qib_init_iba7220_funcs' Link: https://lore.kernel.org/r/20210121094519.2044049-25-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_iba7220.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 0a6f26d4cb31..229dcd6ead95 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1701,7 +1701,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd) /** * qib_setup_7220_setextled - set the state of the two external LEDs - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @on: whether the link is up or not * * The exact combo of LEDs if on is true is determined by looking @@ -2146,7 +2146,7 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) * qib_7220_put_tid - write a TID to the chip * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: 0 for eager, 1 for expected + * @type: 0 for eager, 1 for expected * @pa: physical address of in memory buffer; tidinvalid if freeing */ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, @@ -2180,7 +2180,7 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, /** * qib_7220_clear_tids - clear all TID entries for a ctxt, expected and eager * @dd: the qlogic_ib device - * @ctxt: the ctxt + * @rcd: the ctxt * * clear all TID entries for a ctxt, expected and eager. * Used from qib_close(). On this chip, TIDs are only 32 bits, @@ -2238,7 +2238,7 @@ static void qib_7220_tidtemplate(struct qib_devdata *dd) /** * qib_init_7220_get_base_info - set chip-specific flags for user code * @rcd: the qlogic_ib ctxt - * @kbase: qib_base_info pointer + * @kinfo: qib_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs * HyperTransport can affect some user packet algorithims. @@ -2896,8 +2896,8 @@ static void sendctrl_7220_mod(struct qib_pportdata *ppd, u32 op) /** * qib_portcntr_7220 - read a per-port counter - * @dd: the qlogic_ib device - * @creg: the counter to snapshot + * @ppd: the qlogic_ib device + * @reg: the counter to snapshot */ static u64 qib_portcntr_7220(struct qib_pportdata *ppd, u32 reg) { @@ -3232,7 +3232,7 @@ static u32 qib_read_7220portcntrs(struct qib_devdata *dd, loff_t pos, u32 port, /** * qib_get_7220_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * @t: contains a pointer to the qlogic_ib device qib_devdata * * This needs more work; in particular, decision on whether we really * need traffic_wds done the way it is @@ -4468,7 +4468,7 @@ static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) /** * qib_init_iba7220_funcs - set up the chip-specific function pointers - * @dev: the pci_dev for qlogic_ib device + * @pdev: the pci_dev for qlogic_ib device * @ent: pci_device_id struct for this dev * * This is global, and is called directly at init to set up the From 8e9a197e8457fb25126b3399fef878f8ed961648 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:14 +0000 Subject: [PATCH 153/414] RDMA/hw/hfi1/file_ops: Fix' manage_rcvq()'s 'arg' param Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/file_ops.c:1533: warning: Function parameter or member 'arg' not described in 'manage_rcvq' drivers/infiniband/hw/hfi1/file_ops.c:1533: warning: Excess function parameter 'start_stop' description in 'manage_rcvq' Link: https://lore.kernel.org/r/20210121094519.2044049-26-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 329ee4f48d95..3b7bbc7b9d10 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1522,7 +1522,7 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) * manage_rcvq - manage a context's receive queue * @uctxt: the context * @subctxt: the sub-context - * @start_stop: action to carry out + * @arg: start/stop action to carry out * * start_stop == 0 disables receive on the context, for use in queue * overflow conditions. start_stop==1 re-enables, to be used to From cf8f5cea24d5612b29ccc28a102d441dffde27c8 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:15 +0000 Subject: [PATCH 154/414] RDMA/sw/rdmavt/mr: Fix some issues related to formatting and missing descriptions Fixes the following W=1 kernel build warning(s): drivers/infiniband/sw/rdmavt/mr.c:380: warning: Function parameter or member 'virt_addr' not described in 'rvt_reg_user_mr' drivers/infiniband/sw/rdmavt/mr.c:449: warning: Function parameter or member 'qp' not described in 'rvt_dereg_clean_qp_cb' drivers/infiniband/sw/rdmavt/mr.c:449: warning: Function parameter or member 'v' not described in 'rvt_dereg_clean_qp_cb' drivers/infiniband/sw/rdmavt/mr.c:466: warning: Function parameter or member 'mr' not described in 'rvt_dereg_clean_qps' drivers/infiniband/sw/rdmavt/mr.c:484: warning: Function parameter or member 'mr' not described in 'rvt_check_refs' drivers/infiniband/sw/rdmavt/mr.c:484: warning: Function parameter or member 't' not described in 'rvt_check_refs' drivers/infiniband/sw/rdmavt/mr.c:513: warning: Function parameter or member 'mr' not described in 'rvt_mr_has_lkey' drivers/infiniband/sw/rdmavt/mr.c:513: warning: Function parameter or member 'lkey' not described in 'rvt_mr_has_lkey' drivers/infiniband/sw/rdmavt/mr.c:526: warning: Function parameter or member 'ss' not described in 'rvt_ss_has_lkey' drivers/infiniband/sw/rdmavt/mr.c:526: warning: Function parameter or member 'lkey' not described in 'rvt_ss_has_lkey' drivers/infiniband/sw/rdmavt/mr.c:551: warning: Function parameter or member 'udata' not described in 'rvt_dereg_mr' Link: https://lore.kernel.org/r/20210121094519.2044049-27-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/mr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 90fc234f489a..601d18dda1f5 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -369,6 +369,7 @@ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) * @pd: protection domain for this memory region * @start: starting userspace address * @length: length of region to register + * @virt_addr: associated virtual address * @mr_access_flags: access flags for this memory region * @udata: unused by the driver * @@ -438,8 +439,8 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, /** * rvt_dereg_clean_qp_cb - callback from iterator - * @qp - the qp - * @v - the mregion (as u64) + * @qp: the qp + * @v: the mregion (as u64) * * This routine fields the callback for all QPs and * for QPs in the same PD as the MR will call the @@ -457,7 +458,7 @@ static void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v) /** * rvt_dereg_clean_qps - find QPs for reference cleanup - * @mr - the MR that is being deregistered + * @mr: the MR that is being deregistered * * This routine iterates RC QPs looking for references * to the lkey noted in mr. @@ -471,8 +472,8 @@ static void rvt_dereg_clean_qps(struct rvt_mregion *mr) /** * rvt_check_refs - check references - * @mr - the megion - * @t - the caller identification + * @mr: the megion + * @t: the caller identification * * This routine checks MRs holding a reference during * when being de-registered. @@ -506,8 +507,8 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t) /** * rvt_mr_has_lkey - is MR - * @mr - the mregion - * @lkey - the lkey + * @mr: the mregion + * @lkey: the lkey */ bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey) { @@ -516,8 +517,8 @@ bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey) /** * rvt_ss_has_lkey - is mr in sge tests - * @ss - the sge state - * @lkey + * @ss: the sge state + * @lkey: the lkey * * This code tests for an MR in the indicated * sge state. @@ -540,7 +541,7 @@ bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey) /** * rvt_dereg_mr - unregister and free a memory region * @ibmr: the memory region to free - * + * @udata: unused by the driver * * Note that this is called to free MRs created by rvt_get_dma_mr() * or rvt_reg_user_mr(). From a8a47b169b8dbffb16bad8cf911fc9de71a87787 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:16 +0000 Subject: [PATCH 155/414] RDMA/hw/qib/qib_iba7322: Fix a bunch of copy/paste issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_iba7322.c:2521: warning: Function parameter or member 'ppd' not described in 'qib_7322_mini_quiet_serdes' drivers/infiniband/hw/qib/qib_iba7322.c:2521: warning: Excess function parameter 'dd' description in 'qib_7322_mini_quiet_serdes' drivers/infiniband/hw/qib/qib_iba7322.c:3768: warning: Function parameter or member 'type' not described in 'qib_7322_put_tid' drivers/infiniband/hw/qib/qib_iba7322.c:3768: warning: Excess function parameter 'tidtype' description in 'qib_7322_put_tid' drivers/infiniband/hw/qib/qib_iba7322.c:3806: warning: Function parameter or member 'rcd' not described in 'qib_7322_clear_tids' drivers/infiniband/hw/qib/qib_iba7322.c:3806: warning: Excess function parameter 'ctxt' description in 'qib_7322_clear_tids' drivers/infiniband/hw/qib/qib_iba7322.c:3872: warning: Function parameter or member 'kinfo' not described in 'qib_7322_get_base_info' drivers/infiniband/hw/qib/qib_iba7322.c:3872: warning: Excess function parameter 'kbase' description in 'qib_7322_get_base_info' drivers/infiniband/hw/qib/qib_iba7322.c:4730: warning: Function parameter or member 'reg' not described in 'qib_portcntr_7322' drivers/infiniband/hw/qib/qib_iba7322.c:4730: warning: Excess function parameter 'creg' description in 'qib_portcntr_7322' drivers/infiniband/hw/qib/qib_iba7322.c:5109: warning: Function parameter or member 't' not described in 'qib_get_7322_faststats' drivers/infiniband/hw/qib/qib_iba7322.c:7189: warning: Function parameter or member 'pdev' not described in 'qib_init_iba7322_funcs' drivers/infiniband/hw/qib/qib_iba7322.c:7189: warning: Excess function parameter 'dev' description in 'qib_init_iba7322_funcs' Link: https://lore.kernel.org/r/20210121094519.2044049-28-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_iba7322.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 189a0ce6056a..9fe6ea75b45e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2514,7 +2514,7 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) /** * qib_7322_quiet_serdes - set serdes to txidle - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * Called when driver is being unloaded */ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd) @@ -3760,7 +3760,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd) * qib_7322_put_tid - write a TID to the chip * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: 0 for eager, 1 for expected + * @type: 0 for eager, 1 for expected * @pa: physical address of in memory buffer; tidinvalid if freeing */ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, @@ -3796,7 +3796,7 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, /** * qib_7322_clear_tids - clear all TID entries for a ctxt, expected and eager * @dd: the qlogic_ib device - * @ctxt: the ctxt + * @rcd: the ctxt * * clear all TID entries for a ctxt, expected and eager. * Used from qib_close(). @@ -3861,7 +3861,7 @@ static void qib_7322_tidtemplate(struct qib_devdata *dd) /** * qib_init_7322_get_base_info - set chip-specific flags for user code * @rcd: the qlogic_ib ctxt - * @kbase: qib_base_info pointer + * @kinfo: qib_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs * HyperTransport can affect some user packet algorithims. @@ -4724,7 +4724,7 @@ static void sendctrl_7322_mod(struct qib_pportdata *ppd, u32 op) /** * qib_portcntr_7322 - read a per-port chip counter * @ppd: the qlogic_ib pport - * @creg: the counter to read (not a chip offset) + * @reg: the counter to read (not a chip offset) */ static u64 qib_portcntr_7322(struct qib_pportdata *ppd, u32 reg) { @@ -5096,7 +5096,7 @@ static u32 qib_read_7322portcntrs(struct qib_devdata *dd, loff_t pos, u32 port, /** * qib_get_7322_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * @t: contains a pointer to the qlogic_ib device qib_devdata * * VESTIGIAL IBA7322 has no "small fast counters", so the only * real purpose of this function is to maintain the notion of @@ -7175,7 +7175,7 @@ static int qib_7322_tempsense_rd(struct qib_devdata *dd, int regnum) /** * qib_init_iba7322_funcs - set up the chip-specific function pointers - * @dev: the pci_dev for qlogic_ib device + * @pdev: the pci_dev for qlogic_ib device * @ent: pci_device_id struct for this dev * * Also allocates, inits, and returns the devdata struct for this From 3c2504be7e6066bf5057323fb54255cfde62e675 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:17 +0000 Subject: [PATCH 156/414] RDMA/hw/qib/qib_verbs: Repair some formatting problems Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_verbs.c:1077: warning: Function parameter or member 'ppd' not described in 'qib_get_counters' drivers/infiniband/hw/qib/qib_verbs.c:1077: warning: Excess function parameter 'dd' description in 'qib_get_counters' drivers/infiniband/hw/qib/qib_verbs.c:1686: warning: Function parameter or member 'qp' not described in '_qib_schedule_send' drivers/infiniband/hw/qib/qib_verbs.c:1703: warning: Function parameter or member 'qp' not described in 'qib_schedule_send' Link: https://lore.kernel.org/r/20210121094519.2044049-29-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_verbs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index f6c01bad5a74..8e0de265ad57 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1067,7 +1067,7 @@ int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords, /** * qib_get_counters - get various chip counters - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @cntrs: counters are placed here * * Return the counters needed by recv_pma_get_portcounters(). @@ -1675,7 +1675,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) /** * _qib_schedule_send - schedule progress - * @qp - the qp + * @qp: the qp * * This schedules progress w/o regard to the s_flags. * @@ -1694,7 +1694,7 @@ bool _qib_schedule_send(struct rvt_qp *qp) /** * qib_schedule_send - schedule progress - * @qp - the qp + * @qp: the qp * * This schedules qp progress. The s_lock * should be held. From 71f964c3a653d0272ee3fea18ccba4154d2e8d36 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:18 +0000 Subject: [PATCH 157/414] RDMA/hw/qib/qib_iba6120: Fix some repeated (copy/paste) kernel-doc issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/qib/qib_iba6120.c:1229: warning: Function parameter or member 'ppd' not described in 'qib_6120_bringup_serdes' drivers/infiniband/hw/qib/qib_iba6120.c:1229: warning: Excess function parameter 'dd' description in 'qib_6120_bringup_serdes' drivers/infiniband/hw/qib/qib_iba6120.c:1436: warning: Function parameter or member 'ppd' not described in 'qib_6120_setup_setextled' drivers/infiniband/hw/qib/qib_iba6120.c:1436: warning: Excess function parameter 'dd' description in 'qib_6120_setup_setextled' drivers/infiniband/hw/qib/qib_iba6120.c:1836: warning: Function parameter or member 'type' not described in 'qib_6120_put_tid' drivers/infiniband/hw/qib/qib_iba6120.c:1836: warning: Excess function parameter 'tidtype' description in 'qib_6120_put_tid' drivers/infiniband/hw/qib/qib_iba6120.c:1903: warning: Function parameter or member 'type' not described in 'qib_6120_put_tid_2' drivers/infiniband/hw/qib/qib_iba6120.c:1903: warning: Excess function parameter 'tidtype' description in 'qib_6120_put_tid_2' drivers/infiniband/hw/qib/qib_iba6120.c:1944: warning: Function parameter or member 'rcd' not described in 'qib_6120_clear_tids' drivers/infiniband/hw/qib/qib_iba6120.c:1944: warning: Excess function parameter 'ctxt' description in 'qib_6120_clear_tids' drivers/infiniband/hw/qib/qib_iba6120.c:2018: warning: Function parameter or member 'kinfo' not described in 'qib_6120_get_base_info' drivers/infiniband/hw/qib/qib_iba6120.c:2018: warning: Excess function parameter 'kbase' description in 'qib_6120_get_base_info' drivers/infiniband/hw/qib/qib_iba6120.c:2277: warning: Function parameter or member 'ppd' not described in 'qib_portcntr_6120' drivers/infiniband/hw/qib/qib_iba6120.c:2277: warning: Function parameter or member 'reg' not described in 'qib_portcntr_6120' drivers/infiniband/hw/qib/qib_iba6120.c:2277: warning: Excess function parameter 'dd' description in 'qib_portcntr_6120' drivers/infiniband/hw/qib/qib_iba6120.c:2277: warning: Excess function parameter 'creg' description in 'qib_portcntr_6120' drivers/infiniband/hw/qib/qib_iba6120.c:2620: warning: Function parameter or member 't' not described in 'qib_get_6120_faststats' Link: https://lore.kernel.org/r/20210121094519.2044049-30-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_iba6120.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 44150be215bf..b35e1174be22 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1223,7 +1223,7 @@ static void qib_set_ib_6120_lstate(struct qib_pportdata *ppd, u16 linkcmd, /** * qib_6120_bringup_serdes - bring up the serdes - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device */ static int qib_6120_bringup_serdes(struct qib_pportdata *ppd) { @@ -1412,7 +1412,7 @@ static void qib_6120_quiet_serdes(struct qib_pportdata *ppd) /** * qib_6120_setup_setextled - set the state of the two external LEDs - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @on: whether the link is up or not * * The exact combo of LEDs if on is true is determined by looking @@ -1823,7 +1823,7 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) * qib_6120_put_tid - write a TID in chip * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) + * @type: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) * for expected * @pa: physical address of in memory buffer; tidinvalid if freeing * @@ -1890,7 +1890,7 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, * qib_6120_put_tid_2 - write a TID in chip, Revision 2 or higher * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) + * @type: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) * for expected * @pa: physical address of in memory buffer; tidinvalid if freeing * @@ -1932,7 +1932,7 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr, /** * qib_6120_clear_tids - clear all TID entries for a context, expected and eager * @dd: the qlogic_ib device - * @ctxt: the context + * @rcd: the context * * clear all TID entries for a context, expected and eager. * Used from qib_close(). On this chip, TIDs are only 32 bits, @@ -2008,7 +2008,7 @@ int __attribute__((weak)) qib_unordered_wc(void) /** * qib_6120_get_base_info - set chip-specific flags for user code * @rcd: the qlogic_ib ctxt - * @kbase: qib_base_info pointer + * @kinfo: qib_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs * HyperTransport can affect some user packet algorithms. @@ -2270,8 +2270,8 @@ static void sendctrl_6120_mod(struct qib_pportdata *ppd, u32 op) /** * qib_portcntr_6120 - read a per-port counter - * @dd: the qlogic_ib device - * @creg: the counter to snapshot + * @ppd: the qlogic_ib device + * @reg: the counter to snapshot */ static u64 qib_portcntr_6120(struct qib_pportdata *ppd, u32 reg) { @@ -2610,7 +2610,7 @@ static void qib_chk_6120_errormask(struct qib_devdata *dd) /** * qib_get_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * @t: contains a pointer to the qlogic_ib device qib_devdata * * This needs more work; in particular, decision on whether we really * need traffic_wds done the way it is From f8e9a970159c7bd30429b86710397e9914fefbca Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 21 Jan 2021 09:45:19 +0000 Subject: [PATCH 158/414] RDMA/sw/rdmavt/qp: Fix a bunch of kernel-doc misdemeanours Fixes the following W=1 kernel build warning(s): drivers/infiniband/sw/rdmavt/qp.c:165: warning: Function parameter or member 'rdi' not described in 'rvt_wss_init' drivers/infiniband/sw/rdmavt/qp.c:329: warning: Function parameter or member 'rdi' not described in 'init_qpn_table' drivers/infiniband/sw/rdmavt/qp.c:534: warning: Function parameter or member 'type' not described in 'alloc_qpn' drivers/infiniband/sw/rdmavt/qp.c:664: warning: Function parameter or member 'wqe' not described in 'rvt_swqe_has_lkey' drivers/infiniband/sw/rdmavt/qp.c:664: warning: Function parameter or member 'lkey' not described in 'rvt_swqe_has_lkey' drivers/infiniband/sw/rdmavt/qp.c:682: warning: Function parameter or member 'qp' not described in 'rvt_qp_sends_has_lkey' drivers/infiniband/sw/rdmavt/qp.c:682: warning: Function parameter or member 'lkey' not described in 'rvt_qp_sends_has_lkey' drivers/infiniband/sw/rdmavt/qp.c:706: warning: Function parameter or member 'qp' not described in 'rvt_qp_acks_has_lkey' drivers/infiniband/sw/rdmavt/qp.c:706: warning: Function parameter or member 'lkey' not described in 'rvt_qp_acks_has_lkey' drivers/infiniband/sw/rdmavt/qp.c:866: warning: Function parameter or member 'rdi' not described in 'rvt_init_qp' drivers/infiniband/sw/rdmavt/qp.c:920: warning: Function parameter or member 'rdi' not described in '_rvt_reset_qp' drivers/infiniband/sw/rdmavt/qp.c:1736: warning: Function parameter or member 'udata' not described in 'rvt_destroy_qp' drivers/infiniband/sw/rdmavt/qp.c:1924: warning: Function parameter or member 'qp' not described in 'rvt_qp_valid_operation' drivers/infiniband/sw/rdmavt/qp.c:1924: warning: Function parameter or member 'post_parms' not described in 'rvt_qp_valid_operation' drivers/infiniband/sw/rdmavt/qp.c:1924: warning: Function parameter or member 'wr' not described in 'rvt_qp_valid_operation' drivers/infiniband/sw/rdmavt/qp.c:2020: warning: Function parameter or member 'call_send' not described in 'rvt_post_one_wr' drivers/infiniband/sw/rdmavt/qp.c:2621: warning: Function parameter or member 'qp' not described in 'rvt_stop_rnr_timer' Link: https://lore.kernel.org/r/20210121094519.2044049-31-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/qp.c | 34 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 22fa9bde5419..76d6bbfbec50 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -156,7 +156,7 @@ void rvt_wss_exit(struct rvt_dev_info *rdi) rdi->wss = NULL; } -/** +/* * rvt_wss_init - Init wss data structures * * Return: 0 on success @@ -323,6 +323,7 @@ static void get_map_page(struct rvt_qpn_table *qpt, /** * init_qpn_table - initialize the QP number table for a device + * @rdi: rvt dev struct * @qpt: the QPN table */ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) @@ -524,6 +525,7 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, * IB_QPT_SMI/IB_QPT_GSI * @rdi: rvt device info structure * @qpt: queue pair number table pointer + * @type: the QP type * @port_num: IB port number, 1 based, comes from core * @exclude_prefix: prefix of special queue pair number being allocated * @@ -655,8 +657,8 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) /** * rvt_swqe_has_lkey - return true if lkey is used by swqe - * @wqe - the send wqe - * @lkey - the lkey + * @wqe: the send wqe + * @lkey: the lkey * * Test the swqe for using lkey */ @@ -675,8 +677,8 @@ static bool rvt_swqe_has_lkey(struct rvt_swqe *wqe, u32 lkey) /** * rvt_qp_sends_has_lkey - return true is qp sends use lkey - * @qp - the rvt_qp - * @lkey - the lkey + * @qp: the rvt_qp + * @lkey: the lkey */ static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey) { @@ -699,8 +701,8 @@ static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey) /** * rvt_qp_acks_has_lkey - return true if acks have lkey - * @qp - the qp - * @lkey - the lkey + * @qp: the qp + * @lkey: the lkey */ static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey) { @@ -716,10 +718,10 @@ static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey) return false; } -/* +/** * rvt_qp_mr_clean - clean up remote ops for lkey - * @qp - the qp - * @lkey - the lkey that is being de-registered + * @qp: the qp + * @lkey: the lkey that is being de-registered * * This routine checks if the lkey is being used by * the qp. @@ -853,6 +855,7 @@ int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node, /** * rvt_init_qp - initialize the QP state to the reset state + * @rdi: rvt dev struct * @qp: the QP to init or reinit * @type: the QP type * @@ -907,6 +910,7 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, /** * _rvt_reset_qp - initialize the QP state to the reset state + * @rdi: rvt dev struct * @qp: the QP to reset * @type: the QP type * @@ -1726,6 +1730,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, /** * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy + * @udata: unused by the driver * * Note that this can be called while the QP is actively sending or * receiving! @@ -1901,9 +1906,9 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, /** * rvt_qp_valid_operation - validate post send wr request - * @qp - the qp - * @post-parms - the post send table for the driver - * @wr - the work request + * @qp: the qp + * @post_parms_ the post send table for the driver + * @wr: the work request * * The routine validates the operation based on the * validation table an returns the length of the operation @@ -2013,6 +2018,7 @@ static inline int rvt_qp_is_avail( * rvt_post_one_wr - post one RC, UC, or UD send work request * @qp: the QP to post on * @wr: the work request to send + * @call_send: kick the send engine into gear */ static int rvt_post_one_wr(struct rvt_qp *qp, const struct ib_send_wr *wr, @@ -2612,7 +2618,7 @@ EXPORT_SYMBOL(rvt_stop_rc_timers); /** * rvt_stop_rnr_timer - stop an rnr timer - * @qp - the QP + * @qp: the QP * * stop an rnr timer and return if the timer * had been pending. From 9bc284ca0b6a1fdbb71fc5b6a0e1b65d743cf2ad Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 25 Jan 2021 09:17:48 +0100 Subject: [PATCH 159/414] printk: rectify kernel-doc for prb_rec_init_wr() The command 'find ./kernel/printk/ | xargs ./scripts/kernel-doc -none' reported a mismatch with the kernel-doc of prb_rec_init_wr(). Rectify the kernel-doc, such that no issues remain for ./kernel/printk/. Signed-off-by: Lukas Bulwahn Reviewed-by: John Ogness Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210125081748.19903-1-lukas.bulwahn@gmail.com --- kernel/printk/printk_ringbuffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index 5dc9d022db07..73cc80e01cef 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -287,7 +287,7 @@ _DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0]) /* Writer Interface */ /** - * prb_rec_init_wd() - Initialize a buffer for writing records. + * prb_rec_init_wr() - Initialize a buffer for writing records. * * @r: The record to initialize. * @text_buf_size: The needed text buffer size. From 7af6fbddbd3379243f11367ca03e2635e42b89ba Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 20 Jan 2021 16:47:13 +0000 Subject: [PATCH 160/414] Documentation: livepatch: Convert to automatically generated contents Automatically generate the tables of contents for livepatch documentation files that have tables of contents rather than open coding them so things are a little easier to maintain. Signed-off-by: Mark Brown Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- Documentation/livepatch/livepatch.rst | 15 +-------------- Documentation/livepatch/module-elf-format.rst | 10 ++-------- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/Documentation/livepatch/livepatch.rst b/Documentation/livepatch/livepatch.rst index c2c598c4ead8..68e3651e8af9 100644 --- a/Documentation/livepatch/livepatch.rst +++ b/Documentation/livepatch/livepatch.rst @@ -6,20 +6,7 @@ This document outlines basic information about kernel livepatching. .. Table of Contents: - 1. Motivation - 2. Kprobes, Ftrace, Livepatching - 3. Consistency model - 4. Livepatch module - 4.1. New functions - 4.2. Metadata - 5. Livepatch life-cycle - 5.1. Loading - 5.2. Enabling - 5.3. Replacing - 5.4. Disabling - 5.5. Removing - 6. Sysfs - 7. Limitations +.. contents:: :local: 1. Motivation diff --git a/Documentation/livepatch/module-elf-format.rst b/Documentation/livepatch/module-elf-format.rst index 8c6b894c4661..dbe9b400e39f 100644 --- a/Documentation/livepatch/module-elf-format.rst +++ b/Documentation/livepatch/module-elf-format.rst @@ -7,14 +7,8 @@ This document outlines the Elf format requirements that livepatch modules must f .. Table of Contents - 1. Background and motivation - 2. Livepatch modinfo field - 3. Livepatch relocation sections - 3.1 Livepatch relocation section format - 4. Livepatch symbols - 4.1 A livepatch module's symbol table - 4.2 Livepatch symbol format - 5. Symbol table and Elf section access +.. contents:: :local: + 1. Background and motivation ============================ From f89f20acff2d0f7a4801dc6ecde3de1ef0abe1d2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 20 Jan 2021 16:47:14 +0000 Subject: [PATCH 161/414] Documentation: livepatch: document reliable stacktrace Add documentation for reliable stacktrace. This is intended to describe the semantics and to be an aid for implementing architecture support for HAVE_RELIABLE_STACKTRACE. Unwinding is a subtle area, and architectures vary greatly in both implementation and the set of concerns that affect them, so I've tried to avoid making this too specific to any given architecture. I've used examples from both x86_64 and arm64 to explain corner cases in more detail, but I've tried to keep the descriptions sufficient for those who are unfamiliar with the particular architecture. This document aims to give rationale for all the recommendations and requirements, since that makes it easier to spot nearby issues, or when a check happens to catch a few things at once. Signed-off-by: Mark Rutland [Updates following review -- broonie] Acked-by: Josh Poimboeuf Reviewed-by: Randy Dunlap Signed-off-by: Mark Brown Signed-off-by: Jiri Kosina --- Documentation/livepatch/index.rst | 1 + .../livepatch/reliable-stacktrace.rst | 309 ++++++++++++++++++ 2 files changed, 310 insertions(+) create mode 100644 Documentation/livepatch/reliable-stacktrace.rst diff --git a/Documentation/livepatch/index.rst b/Documentation/livepatch/index.rst index 525944063be7..43cce5fad705 100644 --- a/Documentation/livepatch/index.rst +++ b/Documentation/livepatch/index.rst @@ -13,6 +13,7 @@ Kernel Livepatching module-elf-format shadow-vars system-state + reliable-stacktrace .. only:: subproject and html diff --git a/Documentation/livepatch/reliable-stacktrace.rst b/Documentation/livepatch/reliable-stacktrace.rst new file mode 100644 index 000000000000..67459d2ca2af --- /dev/null +++ b/Documentation/livepatch/reliable-stacktrace.rst @@ -0,0 +1,309 @@ +=================== +Reliable Stacktrace +=================== + +This document outlines basic information about reliable stacktracing. + +.. Table of Contents: + +.. contents:: :local: + +1. Introduction +=============== + +The kernel livepatch consistency model relies on accurately identifying which +functions may have live state and therefore may not be safe to patch. One way +to identify which functions are live is to use a stacktrace. + +Existing stacktrace code may not always give an accurate picture of all +functions with live state, and best-effort approaches which can be helpful for +debugging are unsound for livepatching. Livepatching depends on architectures +to provide a *reliable* stacktrace which ensures it never omits any live +functions from a trace. + + +2. Requirements +=============== + +Architectures must implement one of the reliable stacktrace functions. +Architectures using CONFIG_ARCH_STACKWALK must implement +'arch_stack_walk_reliable', and other architectures must implement +'save_stack_trace_tsk_reliable'. + +Principally, the reliable stacktrace function must ensure that either: + +* The trace includes all functions that the task may be returned to, and the + return code is zero to indicate that the trace is reliable. + +* The return code is non-zero to indicate that the trace is not reliable. + +.. note:: + In some cases it is legitimate to omit specific functions from the trace, + but all other functions must be reported. These cases are described in + futher detail below. + +Secondly, the reliable stacktrace function must be robust to cases where +the stack or other unwind state is corrupt or otherwise unreliable. The +function should attempt to detect such cases and return a non-zero error +code, and should not get stuck in an infinite loop or access memory in +an unsafe way. Specific cases are described in further detail below. + + +3. Compile-time analysis +======================== + +To ensure that kernel code can be correctly unwound in all cases, +architectures may need to verify that code has been compiled in a manner +expected by the unwinder. For example, an unwinder may expect that +functions manipulate the stack pointer in a limited way, or that all +functions use specific prologue and epilogue sequences. Architectures +with such requirements should verify the kernel compilation using +objtool. + +In some cases, an unwinder may require metadata to correctly unwind. +Where necessary, this metadata should be generated at build time using +objtool. + + +4. Considerations +================= + +The unwinding process varies across architectures, their respective procedure +call standards, and kernel configurations. This section describes common +details that architectures should consider. + +4.1 Identifying successful termination +-------------------------------------- + +Unwinding may terminate early for a number of reasons, including: + +* Stack or frame pointer corruption. + +* Missing unwind support for an uncommon scenario, or a bug in the unwinder. + +* Dynamically generated code (e.g. eBPF) or foreign code (e.g. EFI runtime + services) not following the conventions expected by the unwinder. + +To ensure that this does not result in functions being omitted from the trace, +even if not caught by other checks, it is strongly recommended that +architectures verify that a stacktrace ends at an expected location, e.g. + +* Within a specific function that is an entry point to the kernel. + +* At a specific location on a stack expected for a kernel entry point. + +* On a specific stack expected for a kernel entry point (e.g. if the + architecture has separate task and IRQ stacks). + +4.2 Identifying unwindable code +------------------------------- + +Unwinding typically relies on code following specific conventions (e.g. +manipulating a frame pointer), but there can be code which may not follow these +conventions and may require special handling in the unwinder, e.g. + +* Exception vectors and entry assembly. + +* Procedure Linkage Table (PLT) entries and veneer functions. + +* Trampoline assembly (e.g. ftrace, kprobes). + +* Dynamically generated code (e.g. eBPF, optprobe trampolines). + +* Foreign code (e.g. EFI runtime services). + +To ensure that such cases do not result in functions being omitted from a +trace, it is strongly recommended that architectures positively identify code +which is known to be reliable to unwind from, and reject unwinding from all +other code. + +Kernel code including modules and eBPF can be distinguished from foreign code +using '__kernel_text_address()'. Checking for this also helps to detect stack +corruption. + +There are several ways an architecture may identify kernel code which is deemed +unreliable to unwind from, e.g. + +* Placing such code into special linker sections, and rejecting unwinding from + any code in these sections. + +* Identifying specific portions of code using bounds information. + +4.3 Unwinding across interrupts and exceptions +---------------------------------------------- + +At function call boundaries the stack and other unwind state is expected to be +in a consistent state suitable for reliable unwinding, but this may not be the +case part-way through a function. For example, during a function prologue or +epilogue a frame pointer may be transiently invalid, or during the function +body the return address may be held in an arbitrary general purpose register. +For some architectures this may change at runtime as a result of dynamic +instrumentation. + +If an interrupt or other exception is taken while the stack or other unwind +state is in an inconsistent state, it may not be possible to reliably unwind, +and it may not be possible to identify whether such unwinding will be reliable. +See below for examples. + +Architectures which cannot identify when it is reliable to unwind such cases +(or where it is never reliable) must reject unwinding across exception +boundaries. Note that it may be reliable to unwind across certain +exceptions (e.g. IRQ) but unreliable to unwind across other exceptions +(e.g. NMI). + +Architectures which can identify when it is reliable to unwind such cases (or +have no such cases) should attempt to unwind across exception boundaries, as +doing so can prevent unnecessarily stalling livepatch consistency checks and +permits livepatch transitions to complete more quickly. + +4.4 Rewriting of return addresses +--------------------------------- + +Some trampolines temporarily modify the return address of a function in order +to intercept when that function returns with a return trampoline, e.g. + +* An ftrace trampoline may modify the return address so that function graph + tracing can intercept returns. + +* A kprobes (or optprobes) trampoline may modify the return address so that + kretprobes can intercept returns. + +When this happens, the original return address will not be in its usual +location. For trampolines which are not subject to live patching, where an +unwinder can reliably determine the original return address and no unwind state +is altered by the trampoline, the unwinder may report the original return +address in place of the trampoline and report this as reliable. Otherwise, an +unwinder must report these cases as unreliable. + +Special care is required when identifying the original return address, as this +information is not in a consistent location for the duration of the entry +trampoline or return trampoline. For example, considering the x86_64 +'return_to_handler' return trampoline: + +.. code-block:: none + + SYM_CODE_START(return_to_handler) + UNWIND_HINT_EMPTY + subq $24, %rsp + + /* Save the return values */ + movq %rax, (%rsp) + movq %rdx, 8(%rsp) + movq %rbp, %rdi + + call ftrace_return_to_handler + + movq %rax, %rdi + movq 8(%rsp), %rdx + movq (%rsp), %rax + addq $24, %rsp + JMP_NOSPEC rdi + SYM_CODE_END(return_to_handler) + +While the traced function runs its return address on the stack points to +the start of return_to_handler, and the original return address is stored in +the task's cur_ret_stack. During this time the unwinder can find the return +address using ftrace_graph_ret_addr(). + +When the traced function returns to return_to_handler, there is no longer a +return address on the stack, though the original return address is still stored +in the task's cur_ret_stack. Within ftrace_return_to_handler(), the original +return address is removed from cur_ret_stack and is transiently moved +arbitrarily by the compiler before being returned in rax. The return_to_handler +trampoline moves this into rdi before jumping to it. + +Architectures might not always be able to unwind such sequences, such as when +ftrace_return_to_handler() has removed the address from cur_ret_stack, and the +location of the return address cannot be reliably determined. + +It is recommended that architectures unwind cases where return_to_handler has +not yet been returned to, but architectures are not required to unwind from the +middle of return_to_handler and can report this as unreliable. Architectures +are not required to unwind from other trampolines which modify the return +address. + +4.5 Obscuring of return addresses +--------------------------------- + +Some trampolines do not rewrite the return address in order to intercept +returns, but do transiently clobber the return address or other unwind state. + +For example, the x86_64 implementation of optprobes patches the probed function +with a JMP instruction which targets the associated optprobe trampoline. When +the probe is hit, the CPU will branch to the optprobe trampoline, and the +address of the probed function is not held in any register or on the stack. + +Similarly, the arm64 implementation of DYNAMIC_FTRACE_WITH_REGS patches traced +functions with the following: + +.. code-block:: none + + MOV X9, X30 + BL + +The MOV saves the link register (X30) into X9 to preserve the return address +before the BL clobbers the link register and branches to the trampoline. At the +start of the trampoline, the address of the traced function is in X9 rather +than the link register as would usually be the case. + +Architectures must either ensure that unwinders either reliably unwind +such cases, or report the unwinding as unreliable. + +4.6 Link register unreliability +------------------------------- + +On some other architectures, 'call' instructions place the return address into a +link register, and 'return' instructions consume the return address from the +link register without modifying the register. On these architectures software +must save the return address to the stack prior to making a function call. Over +the duration of a function call, the return address may be held in the link +register alone, on the stack alone, or in both locations. + +Unwinders typically assume the link register is always live, but this +assumption can lead to unreliable stack traces. For example, consider the +following arm64 assembly for a simple function: + +.. code-block:: none + + function: + STP X29, X30, [SP, -16]! + MOV X29, SP + BL + LDP X29, X30, [SP], #16 + RET + +At entry to the function, the link register (x30) points to the caller, and the +frame pointer (X29) points to the caller's frame including the caller's return +address. The first two instructions create a new stackframe and update the +frame pointer, and at this point the link register and the frame pointer both +describe this function's return address. A trace at this point may describe +this function twice, and if the function return is being traced, the unwinder +may consume two entries from the fgraph return stack rather than one entry. + +The BL invokes 'other_function' with the link register pointing to this +function's LDR and the frame pointer pointing to this function's stackframe. +When 'other_function' returns, the link register is left pointing at the BL, +and so a trace at this point could result in 'function' appearing twice in the +backtrace. + +Similarly, a function may deliberately clobber the LR, e.g. + +.. code-block:: none + + caller: + STP X29, X30, [SP, -16]! + MOV X29, SP + ADR LR, + BLR LR + LDP X29, X30, [SP], #16 + RET + +The ADR places the address of 'callee' into the LR, before the BLR branches to +this address. If a trace is made immediately after the ADR, 'callee' will +appear to be the parent of 'caller', rather than the child. + +Due to cases such as the above, it may only be possible to reliably consume a +link register value at a function call boundary. Architectures where this is +the case must reject unwinding across exception boundaries unless they can +reliably identify when the LR or stack value should be used (e.g. using +metadata generated by objtool). From dead723e6f049e9fb6b05e5b93456982798ea961 Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Mon, 25 Jan 2021 13:52:25 -0800 Subject: [PATCH 162/414] iommu/arm-smmu-qcom: Fix mask extraction for bootloader programmed SMRs When extracting the mask for a SMR that was programmed by the bootloader, the SMR's valid bit is also extracted and is treated as part of the mask, which is not correct. Consider the scenario where an SMMU master whose context is determined by a bootloader programmed SMR is removed (omitting parts of device/driver core): ->iommu_release_device() -> arm_smmu_release_device() -> arm_smmu_master_free_smes() -> arm_smmu_free_sme() /* Assume that the SME is now free */ -> arm_smmu_write_sme() -> arm_smmu_write_smr() /* Construct SMR value using mask and SID */ Since the valid bit was considered as part of the mask, the SMR will be programmed as valid. Fix the SMR mask extraction step for bootloader programmed SMRs by masking out the valid bit when we know that we're already working with a valid SMR. Fixes: 07a7f2caaa5a ("iommu/arm-smmu-qcom: Read back stream mappings") Signed-off-by: Isaac J. Manjarres Cc: stable@vger.kernel.org Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/1611611545-19055-1-git-send-email-isaacm@codeaurora.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 3b6fdbb6c916..98b3a1c2a181 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -207,6 +207,8 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i)); if (FIELD_GET(ARM_SMMU_SMR_VALID, smr)) { + /* Ignore valid bit for SMR mask extraction. */ + smr &= ~ARM_SMMU_SMR_VALID; smmu->smrs[i].id = FIELD_GET(ARM_SMMU_SMR_ID, smr); smmu->smrs[i].mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr); smmu->smrs[i].valid = true; From 9cc0aaeb96e7f894d4735f069174948c1516fea7 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jan 2021 21:35:06 +0800 Subject: [PATCH 163/414] iova: Make has_iova_flush_queue() private Function has_iova_flush_queue() has no users outside iova.c, so make it private. Signed-off-by: John Garry Acked-by: Will Deacon Link: https://lore.kernel.org/r/1609940111-28563-2-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 2 +- include/linux/iova.h | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d20b8b333d30..5f2caa95c1f0 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -55,7 +55,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, } EXPORT_SYMBOL_GPL(init_iova_domain); -bool has_iova_flush_queue(struct iova_domain *iovad) +static bool has_iova_flush_queue(struct iova_domain *iovad) { return !!iovad->fq; } diff --git a/include/linux/iova.h b/include/linux/iova.h index 76e16ae20729..2b76e0be1c5b 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -153,7 +153,6 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); -bool has_iova_flush_queue(struct iova_domain *iovad); int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); @@ -223,11 +222,6 @@ static inline void init_iova_domain(struct iova_domain *iovad, { } -static inline bool has_iova_flush_queue(struct iova_domain *iovad) -{ - return false; -} - static inline int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) From 622106190175dbac2b0b0ee7d4275c474e5fe051 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jan 2021 21:35:07 +0800 Subject: [PATCH 164/414] iova: Delete copy_reserved_iova() Since commit c588072bba6b ("iommu/vt-d: Convert intel iommu driver to the iommu ops"), function copy_reserved_iova() is not referenced, so delete it. Signed-off-by: John Garry Acked-by: Will Deacon Link: https://lore.kernel.org/r/1609940111-28563-3-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 30 ------------------------------ include/linux/iova.h | 6 ------ 2 files changed, 36 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 5f2caa95c1f0..9b114cd886d5 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -710,36 +710,6 @@ reserve_iova(struct iova_domain *iovad, } EXPORT_SYMBOL_GPL(reserve_iova); -/** - * copy_reserved_iova - copies the reserved between domains - * @from: - source domain from where to copy - * @to: - destination domin where to copy - * This function copies reserved iova's from one domain to - * other. - */ -void -copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) -{ - unsigned long flags; - struct rb_node *node; - - spin_lock_irqsave(&from->iova_rbtree_lock, flags); - for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { - struct iova *iova = rb_entry(node, struct iova, node); - struct iova *new_iova; - - if (iova->pfn_lo == IOVA_ANCHOR) - continue; - - new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); - if (!new_iova) - pr_err("Reserve iova range %lx@%lx failed\n", - iova->pfn_lo, iova->pfn_lo); - } - spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); -} -EXPORT_SYMBOL_GPL(copy_reserved_iova); - /* * Magazine caches for IOVA ranges. For an introduction to magazines, * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab diff --git a/include/linux/iova.h b/include/linux/iova.h index 2b76e0be1c5b..c834c01c0a5b 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -150,7 +150,6 @@ unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool flush_rcache); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); -void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); int init_iova_flush_queue(struct iova_domain *iovad, @@ -211,11 +210,6 @@ static inline struct iova *reserve_iova(struct iova_domain *iovad, return NULL; } -static inline void copy_reserved_iova(struct iova_domain *from, - struct iova_domain *to) -{ -} - static inline void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn) From 2cf7dbff0a955f546a1d2c132b94f9d5b837b714 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jan 2021 21:35:08 +0800 Subject: [PATCH 165/414] iova: Stop exporting some more functions The following functions are not referenced outside dma-iommu.c (and iova.c), which can only be built-in: - init_iova_flush_queue() - free_iova_fast() - queue_iova() - alloc_iova_fast() So stop exporting them. Signed-off-by: John Garry Acked-by: Will Deacon Link: https://lore.kernel.org/r/1609940111-28563-4-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 9b114cd886d5..e6e2fa85271c 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -112,7 +112,6 @@ int init_iova_flush_queue(struct iova_domain *iovad, return 0; } -EXPORT_SYMBOL_GPL(init_iova_flush_queue); static struct rb_node * __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) @@ -451,7 +450,6 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size, return new_iova->pfn_lo; } -EXPORT_SYMBOL_GPL(alloc_iova_fast); /** * free_iova_fast - free iova pfn range into rcache @@ -598,7 +596,6 @@ void queue_iova(struct iova_domain *iovad, mod_timer(&iovad->fq_timer, jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); } -EXPORT_SYMBOL_GPL(queue_iova); /** * put_iova_domain - destroys the iova domain From b91910a83d041d87115068c773438575d8279534 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jan 2021 21:35:09 +0800 Subject: [PATCH 166/414] iommu: Stop exporting iommu_map_sg_atomic() Function iommu_map_sg_atomic() is only referenced in dma-iommu.c, which can only be built-in, so stop exporting. Signed-off-by: John Garry Acked-by: Will Deacon Link: https://lore.kernel.org/r/1609940111-28563-5-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index ffeebda8d6de..4f4edb087ce6 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2586,7 +2586,6 @@ size_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova, { return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); } -EXPORT_SYMBOL_GPL(iommu_map_sg_atomic); int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, u64 size, int prot) From ab0a7119ba67be9e377b195d2b9baa9fb8b3b53e Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jan 2021 21:35:10 +0800 Subject: [PATCH 167/414] iommu: Delete iommu_domain_window_disable() Function iommu_domain_window_disable() is not referenced in the tree, so delete it. Signed-off-by: John Garry Acked-by: Will Deacon Link: https://lore.kernel.org/r/1609940111-28563-6-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 9 --------- include/linux/iommu.h | 6 ------ 2 files changed, 15 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 4f4edb087ce6..20201ef97b8f 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2598,15 +2598,6 @@ int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, } EXPORT_SYMBOL_GPL(iommu_domain_window_enable); -void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr) -{ - if (unlikely(domain->ops->domain_window_disable == NULL)) - return; - - return domain->ops->domain_window_disable(domain, wnd_nr); -} -EXPORT_SYMBOL_GPL(iommu_domain_window_disable); - /** * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework * @domain: the iommu domain where the fault has happened diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b3f0e2018c62..72059fcfa108 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -514,7 +514,6 @@ extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t offset, u64 size, int prot); -extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr); extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); @@ -746,11 +745,6 @@ static inline int iommu_domain_window_enable(struct iommu_domain *domain, return -ENODEV; } -static inline void iommu_domain_window_disable(struct iommu_domain *domain, - u32 wnd_nr) -{ -} - static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { return 0; From 262948f8ba573dc9c61650df8f23eaea7d43bc61 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jan 2021 21:35:11 +0800 Subject: [PATCH 168/414] iommu: Delete iommu_dev_has_feature() Function iommu_dev_has_feature() has never been referenced in the tree, and there does not appear to be anything coming soon to use it, so delete it. Signed-off-by: John Garry Acked-by: Will Deacon Link: https://lore.kernel.org/r/1609940111-28563-7-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 11 ----------- include/linux/iommu.h | 7 ------- 2 files changed, 18 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 20201ef97b8f..91f7871f5a37 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2853,17 +2853,6 @@ EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); /* * Per device IOMMU features. */ -bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) -{ - const struct iommu_ops *ops = dev->bus->iommu_ops; - - if (ops && ops->dev_has_feat) - return ops->dev_has_feat(dev, feat); - - return false; -} -EXPORT_SYMBOL_GPL(iommu_dev_has_feature); - int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { const struct iommu_ops *ops = dev->bus->iommu_ops; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 72059fcfa108..91d94c014f47 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -626,7 +626,6 @@ static inline void dev_iommu_priv_set(struct device *dev, void *priv) int iommu_probe_device(struct device *dev); void iommu_release_device(struct device *dev); -bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); @@ -975,12 +974,6 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) return NULL; } -static inline bool -iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) -{ - return false; -} - static inline bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) { From f8993dc666f44e802c705d1053c4491981413f9e Mon Sep 17 00:00:00 2001 From: Adrian Huang Date: Thu, 10 Dec 2020 10:13:30 +0800 Subject: [PATCH 169/414] iommu/amd: Remove unnecessary assignment From: Adrian Huang The values of local variables are assigned after local variables are declared, so no need to assign the initial value during the variable declaration. And, no need to assign NULL for the local variable 'ivrs_base' after invoking acpi_put_table(). Signed-off-by: Adrian Huang Acked-by: Will Deacon Link: https://lore.kernel.org/r/20201210021330.2022-1-adrianhuang0701@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 6a1f7048dacc..f999d3424876 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1911,7 +1911,7 @@ static void print_iommu_info(void) static int __init amd_iommu_init_pci(void) { struct amd_iommu *iommu; - int ret = 0; + int ret; for_each_iommu(iommu) { ret = iommu_init_pci(iommu); @@ -2637,8 +2637,8 @@ static void __init free_dma_resources(void) static int __init early_amd_iommu_init(void) { struct acpi_table_header *ivrs_base; + int i, remap_cache_sz, ret; acpi_status status; - int i, remap_cache_sz, ret = 0; u32 pci_id; if (!amd_iommu_detected) @@ -2780,7 +2780,6 @@ static int __init early_amd_iommu_init(void) out: /* Don't leak any ACPI memory */ acpi_put_table(ivrs_base); - ivrs_base = NULL; return ret; } From 106650f1e94527f15c0e99285b88ced84b47ab52 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Mon, 28 Dec 2020 21:51:12 +0800 Subject: [PATCH 170/414] iommu/amd: Use DEFINE_SPINLOCK() for spinlock Spinlock can be initialized automatically with DEFINE_SPINLOCK() rather than explicitly calling spin_lock_init(). Signed-off-by: Zheng Yongjun Link: https://lore.kernel.org/r/20201228135112.28621-1-zhengyongjun3@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu_v2.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 5ecc0bc608ec..f8d4ad421e07 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -77,7 +77,7 @@ struct fault { }; static LIST_HEAD(state_list); -static spinlock_t state_lock; +static DEFINE_SPINLOCK(state_lock); static struct workqueue_struct *iommu_wq; @@ -938,8 +938,6 @@ static int __init amd_iommu_v2_init(void) return 0; } - spin_lock_init(&state_lock); - ret = -ENOMEM; iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0); if (iommu_wq == NULL) From 3703c839e1cf23a3ccecebed73cbf3621238ac00 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 15 Dec 2020 13:30:21 -0800 Subject: [PATCH 171/414] iommu/amd: remove h from printk format specifier See Documentation/core-api/printk-formats.rst. commit cbacb5ab0aa0 ("docs: printk-formats: Stop encouraging use of unnecessary %h[xudi] and %hh[xudi]") Standard integer promotion is already done and %hx and %hhx is useless so do not encourage the use of %hh[xudi] or %h[xudi]. Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20201215213021.2090698-1-trix@redhat.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index f999d3424876..1804c3e3b6ac 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1883,7 +1883,7 @@ static void print_iommu_info(void) struct pci_dev *pdev = iommu->dev; int i; - pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr); + pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { pci_info(pdev, "Extended features (%#llx):", From 8c112a6b3dc6e292625117255ab7bd049639f286 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:36:53 -0600 Subject: [PATCH 172/414] iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline Move the function to header file to allow inclusion in other files. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-2-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 13 +++++++++++++ drivers/iommu/amd/iommu.c | 10 ---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 6b8cbdf71714..0817bc732d1a 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -102,6 +102,19 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) return phys_to_virt(__sme_clr(paddr)); } +static inline +void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) +{ + atomic64_set(&domain->pt_root, root); +} + +static inline +void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) +{ + amd_iommu_domain_set_pt_root(domain, 0); +} + + extern bool translation_pre_enabled(struct amd_iommu *iommu); extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f0adbc48fd17..3485f8b60d7a 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -160,16 +160,6 @@ static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ } -static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) -{ - atomic64_set(&domain->pt_root, root); -} - -static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) -{ - amd_iommu_domain_set_pt_root(domain, 0); -} - static void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode) { From d8c1df02ac7f2c802a9b2afc0f5c888c4217f1d5 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 7 Jan 2021 20:29:03 +0800 Subject: [PATCH 173/414] iommu: Move iotlb_sync_map out from __iommu_map In the end of __iommu_map, It alway call iotlb_sync_map. This patch moves iotlb_sync_map out from __iommu_map since it is unnecessary to call this for each sg segment especially iotlb_sync_map is flush tlb all currently. Add a little helper _iommu_map for this. Signed-off-by: Yong Wu Reviewed-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210107122909.16317-2-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/iommu.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index ffeebda8d6de..c304a6a30d42 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2426,9 +2426,6 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, size -= pgsize; } - if (ops->iotlb_sync_map) - ops->iotlb_sync_map(domain); - /* unroll mapping in case something went wrong */ if (ret) iommu_unmap(domain, orig_iova, orig_size - size); @@ -2438,18 +2435,31 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } +static int _iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + const struct iommu_ops *ops = domain->ops; + int ret; + + ret = __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); + if (ret == 0 && ops->iotlb_sync_map) + ops->iotlb_sync_map(domain); + + return ret; +} + int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { might_sleep(); - return __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); + return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); } EXPORT_SYMBOL_GPL(iommu_map); int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { - return __iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); + return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(iommu_map_atomic); @@ -2533,6 +2543,7 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot, gfp_t gfp) { + const struct iommu_ops *ops = domain->ops; size_t len = 0, mapped = 0; phys_addr_t start; unsigned int i = 0; @@ -2563,6 +2574,8 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, sg = sg_next(sg); } + if (ops->iotlb_sync_map) + ops->iotlb_sync_map(domain); return mapped; out_err: From 2ebbd25873cef06f739489fd8ff9f707a3dfa2fa Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 7 Jan 2021 20:29:04 +0800 Subject: [PATCH 174/414] iommu: Add iova and size as parameters in iotlb_sync_map iotlb_sync_map allow IOMMU drivers tlb sync after completing the whole mapping. This patch adds iova and size as the parameters in it. then the IOMMU driver could flush tlb with the whole range once after iova mapping to improve performance. Signed-off-by: Yong Wu Reviewed-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210107122909.16317-3-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/iommu.c | 4 ++-- drivers/iommu/tegra-gart.c | 7 +++++-- include/linux/iommu.h | 3 ++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index c304a6a30d42..3d099a31ddca 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2443,7 +2443,7 @@ static int _iommu_map(struct iommu_domain *domain, unsigned long iova, ret = __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); if (ret == 0 && ops->iotlb_sync_map) - ops->iotlb_sync_map(domain); + ops->iotlb_sync_map(domain, iova, size); return ret; } @@ -2575,7 +2575,7 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, } if (ops->iotlb_sync_map) - ops->iotlb_sync_map(domain); + ops->iotlb_sync_map(domain, iova, mapped); return mapped; out_err: diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index fac720273889..05e8e19b8269 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -261,7 +261,8 @@ static int gart_iommu_of_xlate(struct device *dev, return 0; } -static void gart_iommu_sync_map(struct iommu_domain *domain) +static void gart_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) { FLUSH_GART_REGS(gart_handle); } @@ -269,7 +270,9 @@ static void gart_iommu_sync_map(struct iommu_domain *domain) static void gart_iommu_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { - gart_iommu_sync_map(domain); + size_t length = gather->end - gather->start; + + gart_iommu_sync_map(domain, gather->start, length); } static const struct iommu_ops gart_iommu_ops = { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b3f0e2018c62..9ce0aa9e236b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -246,7 +246,8 @@ struct iommu_ops { size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_sync_map)(struct iommu_domain *domain); + void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, + size_t size); void (*iotlb_sync)(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); From 20143451eff044310520932fb372704c99658b33 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 7 Jan 2021 20:29:05 +0800 Subject: [PATCH 175/414] iommu/mediatek: Add iotlb_sync_map to sync whole the iova range Remove IO_PGTABLE_QUIRK_TLBI_ON_MAP to avoid tlb sync for each a small chunk memory, Use the new iotlb_sync_map to tlb_sync once for whole the iova range of iommu_map. Signed-off-by: Yong Wu Reviewed-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210107122909.16317-4-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 8e56cec532e7..f579fc21971e 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -322,7 +322,6 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) dom->cfg = (struct io_pgtable_cfg) { .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | - IO_PGTABLE_QUIRK_TLBI_ON_MAP | IO_PGTABLE_QUIRK_ARM_MTK_EXT, .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap, .ias = 32, @@ -453,6 +452,14 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, data); } +static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) +{ + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + + mtk_iommu_tlb_flush_range_sync(iova, size, size, data); +} + static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -539,6 +546,7 @@ static const struct iommu_ops mtk_iommu_ops = { .unmap = mtk_iommu_unmap, .flush_iotlb_all = mtk_iommu_flush_iotlb_all, .iotlb_sync = mtk_iommu_iotlb_sync, + .iotlb_sync_map = mtk_iommu_sync_map, .iova_to_phys = mtk_iommu_iova_to_phys, .probe_device = mtk_iommu_probe_device, .release_device = mtk_iommu_release_device, From 862c3715de8f3e5350489240c951d697f04bd8c9 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 7 Jan 2021 20:29:06 +0800 Subject: [PATCH 176/414] iommu: Switch gather->end to the inclusive end Currently gather->end is "unsigned long" which may be overflow in arch32 in the corner case: 0xfff00000 + 0x100000(iova + size). Although it doesn't affect the size(end - start), it affects the checking "gather->end < end" This patch changes this "end" to the real end address (end = start + size - 1). Correspondingly, update the length to "end - start + 1". Fixes: a7d20dc19d9e ("iommu: Introduce struct iommu_iotlb_gather for batching TLB flushes") Signed-off-by: Yong Wu Reviewed-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210107122909.16317-5-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- drivers/iommu/mtk_iommu.c | 2 +- drivers/iommu/tegra-gart.c | 2 +- include/linux/iommu.h | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8ca7415d785d..c70d6e79f534 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2280,7 +2280,7 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start, + arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start + 1, gather->pgsize, true, smmu_domain); } diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index f579fc21971e..66a00a2cb445 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -443,7 +443,7 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); - size_t length = gather->end - gather->start; + size_t length = gather->end - gather->start + 1; if (gather->start == ULONG_MAX) return; diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 05e8e19b8269..6f130e51f072 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -270,7 +270,7 @@ static void gart_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, static void gart_iommu_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { - size_t length = gather->end - gather->start; + size_t length = gather->end - gather->start + 1; gart_iommu_sync_map(domain, gather->start, length); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9ce0aa9e236b..ae8eddd4621b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -170,7 +170,7 @@ enum iommu_dev_features { * struct iommu_iotlb_gather - Range information for a pending IOTLB flush * * @start: IOVA representing the start of the range to be flushed - * @end: IOVA representing the end of the range to be flushed (exclusive) + * @end: IOVA representing the end of the range to be flushed (inclusive) * @pgsize: The interval at which to perform the flush * * This structure is intended to be updated by multiple calls to the @@ -539,7 +539,7 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, struct iommu_iotlb_gather *gather, unsigned long iova, size_t size) { - unsigned long start = iova, end = start + size; + unsigned long start = iova, end = start + size - 1; /* * If the new page is disjoint from the current range or is mapped at From 77e0992aee4e980e8c553e512a5dfa3e704cf030 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 7 Jan 2021 20:29:07 +0800 Subject: [PATCH 177/414] iommu/io-pgtable: Allow io_pgtable_tlb ops optional This patch allows io_pgtable_tlb ops could be null since the IOMMU drivers may use the tlb ops from iommu framework. Signed-off-by: Yong Wu Reviewed-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210107122909.16317-6-yong.wu@mediatek.com Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ea727eb1a1a9..2a5686ca2ba3 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -214,14 +214,16 @@ struct io_pgtable_domain_attr { static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop) { - iop->cfg.tlb->tlb_flush_all(iop->cookie); + if (iop->cfg.tlb && iop->cfg.tlb->tlb_flush_all) + iop->cfg.tlb->tlb_flush_all(iop->cookie); } static inline void io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova, size_t size, size_t granule) { - iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie); + if (iop->cfg.tlb && iop->cfg.tlb->tlb_flush_walk) + iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie); } static inline void @@ -229,7 +231,7 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop, struct iommu_iotlb_gather * gather, unsigned long iova, size_t granule) { - if (iop->cfg.tlb->tlb_add_page) + if (iop->cfg.tlb && iop->cfg.tlb->tlb_add_page) iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie); } From f21ae3b100840c1439fb326841f24641cf3de4a1 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 7 Jan 2021 20:29:08 +0800 Subject: [PATCH 178/414] iommu/mediatek: Gather iova in iommu_unmap to achieve tlb sync once In current iommu_unmap, this code is: iommu_iotlb_gather_init(&iotlb_gather); ret = __iommu_unmap(domain, iova, size, &iotlb_gather); iommu_iotlb_sync(domain, &iotlb_gather); We could gather the whole iova range in __iommu_unmap, and then do tlb synchronization in the iommu_iotlb_sync. This patch implement this, Gather the range in mtk_iommu_unmap. then iommu_iotlb_sync call tlb synchronization for the gathered iova range. we don't call iommu_iotlb_gather_add_page since our tlb synchronization could be regardless of granule size. In this way, gather->start is impossible ULONG_MAX, remove the checking. This patch aims to do tlb synchronization *once* in the iommu_unmap. Signed-off-by: Yong Wu Reviewed-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210107122909.16317-7-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 66a00a2cb445..d3b8a1649093 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -430,7 +430,12 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); + unsigned long end = iova + size - 1; + if (gather->start > iova) + gather->start = iova; + if (gather->end < end) + gather->end = end; return dom->iop->unmap(dom->iop, iova, size, gather); } @@ -445,9 +450,6 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); size_t length = gather->end - gather->start + 1; - if (gather->start == ULONG_MAX) - return; - mtk_iommu_tlb_flush_range_sync(gather->start, length, gather->pgsize, data); } From 0954d61a59e3c014e52b8d938bc12dc5a2e4949c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 7 Jan 2021 20:29:09 +0800 Subject: [PATCH 179/414] iommu/mediatek: Remove the tlb-ops for v7s Until now, we have already used the tlb operations from iommu framework, then the tlb operations for v7s can be removed. Correspondingly, Switch the paramenter "cookie" to the internal structure. Signed-off-by: Yong Wu Reviewed-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210107122909.16317-8-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index d3b8a1649093..86ab577c9520 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -182,10 +182,8 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) return container_of(dom, struct mtk_iommu_domain, domain); } -static void mtk_iommu_tlb_flush_all(void *cookie) +static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) { - struct mtk_iommu_data *data = cookie; - for_each_m4u(data) { writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + data->plat_data->inv_sel_reg); @@ -195,9 +193,9 @@ static void mtk_iommu_tlb_flush_all(void *cookie) } static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, - size_t granule, void *cookie) + size_t granule, + struct mtk_iommu_data *data) { - struct mtk_iommu_data *data = cookie; unsigned long flags; int ret; u32 tmp; @@ -219,7 +217,7 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, if (ret) { dev_warn(data->dev, "Partial TLB flush timed out, falling back to full flush\n"); - mtk_iommu_tlb_flush_all(cookie); + mtk_iommu_tlb_flush_all(data); } /* Clear the CPE status */ writel_relaxed(0, data->base + REG_MMU_CPE_DONE); @@ -227,22 +225,6 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, } } -static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, - void *cookie) -{ - struct mtk_iommu_data *data = cookie; - struct iommu_domain *domain = &data->m4u_dom->domain; - - iommu_iotlb_gather_add_page(domain, gather, iova, granule); -} - -static const struct iommu_flush_ops mtk_iommu_flush_ops = { - .tlb_flush_all = mtk_iommu_tlb_flush_all, - .tlb_flush_walk = mtk_iommu_tlb_flush_range_sync, - .tlb_add_page = mtk_iommu_tlb_flush_page_nosync, -}; - static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) { struct mtk_iommu_data *data = dev_id; @@ -326,7 +308,6 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap, .ias = 32, .oas = 34, - .tlb = &mtk_iommu_flush_ops, .iommu_dev = data->dev, }; From 9872f9bd9dbd68f75e8db782717d71e8594f6a02 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 14 Jan 2021 16:50:21 +0800 Subject: [PATCH 180/414] iommu/vt-d: Consolidate duplicate cache invaliation code The pasid based IOTLB and devTLB invalidation code is duplicate in several places. Consolidate them by using the common helpers. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210114085021.717041-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 18 ++----------- drivers/iommu/intel/svm.c | 53 ++++++------------------------------- 2 files changed, 10 insertions(+), 61 deletions(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index b92af83b79bd..f26cb6195b2c 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -456,20 +456,6 @@ pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, qi_submit_sync(iommu, &desc, 1, 0); } -static void -iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) -{ - struct qi_desc desc; - - desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; - desc.qw1 = 0; - desc.qw2 = 0; - desc.qw3 = 0; - - qi_submit_sync(iommu, &desc, 1, 0); -} - static void devtlb_invalidation_with_pasid(struct intel_iommu *iommu, struct device *dev, u32 pasid) @@ -514,7 +500,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, clflush_cache_range(pte, sizeof(*pte)); pasid_cache_invalidation_with_pasid(iommu, did, pasid); - iotlb_invalidation_with_pasid(iommu, did, pasid); + qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); /* Device IOTLB doesn't need to be flushed in caching mode. */ if (!cap_caching_mode(iommu->cap)) @@ -530,7 +516,7 @@ static void pasid_flush_caches(struct intel_iommu *iommu, if (cap_caching_mode(iommu->cap)) { pasid_cache_invalidation_with_pasid(iommu, did, pasid); - iotlb_invalidation_with_pasid(iommu, did, pasid); + qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); } else { iommu_flush_write_buffer(iommu); } diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 18a9f05df407..033b25886e57 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -123,53 +123,16 @@ static void __flush_svm_range_dev(struct intel_svm *svm, unsigned long address, unsigned long pages, int ih) { - struct qi_desc desc; + struct device_domain_info *info = get_domain_info(sdev->dev); - if (pages == -1) { - desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | - QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | - QI_EIOTLB_TYPE; - desc.qw1 = 0; - } else { - int mask = ilog2(__roundup_pow_of_two(pages)); + if (WARN_ON(!pages)) + return; - desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | - QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | - QI_EIOTLB_TYPE; - desc.qw1 = QI_EIOTLB_ADDR(address) | - QI_EIOTLB_IH(ih) | - QI_EIOTLB_AM(mask); - } - desc.qw2 = 0; - desc.qw3 = 0; - qi_submit_sync(sdev->iommu, &desc, 1, 0); - - if (sdev->dev_iotlb) { - desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) | - QI_DEV_EIOTLB_SID(sdev->sid) | - QI_DEV_EIOTLB_QDEP(sdev->qdep) | - QI_DEIOTLB_TYPE; - if (pages == -1) { - desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | - QI_DEV_EIOTLB_SIZE; - } else if (pages > 1) { - /* The least significant zero bit indicates the size. So, - * for example, an "address" value of 0x12345f000 will - * flush from 0x123440000 to 0x12347ffff (256KiB). */ - unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT); - unsigned long mask = __rounddown_pow_of_two(address ^ last); - - desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) | - (mask - 1)) | QI_DEV_EIOTLB_SIZE; - } else { - desc.qw1 = QI_DEV_EIOTLB_ADDR(address); - } - desc.qw2 = 0; - desc.qw3 = 0; - qi_submit_sync(sdev->iommu, &desc, 1, 0); - } + qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); + if (info->ats_enabled) + qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, + svm->pasid, sdev->qdep, address, + order_base_2(pages)); } static void intel_flush_svm_range_dev(struct intel_svm *svm, From f2dd871799ba5d80f95f9bdbc0e60d390e1bcd22 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 14 Jan 2021 17:04:00 +0800 Subject: [PATCH 181/414] iommu/vt-d: Add qi_submit trace event This adds a new trace event to track the submissions of requests to the invalidation queue. This event will provide the information like: - IOMMU name - Invalidation type - Descriptor raw data A sample output like: | qi_submit: iotlb_inv dmar1: 0x100e2 0x0 0x0 0x0 | qi_submit: dev_tlb_inv dmar1: 0x1000000003 0x7ffffffffffff001 0x0 0x0 | qi_submit: iotlb_inv dmar2: 0x800f2 0xf9a00005 0x0 0x0 This will be helpful for queued invalidation related debugging. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210114090400.736104-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 3 +++ include/trace/events/intel_iommu.h | 37 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 004feaed3c72..bd51f33642e0 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "../irq_remapping.h" @@ -1307,6 +1308,8 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, offset = ((index + i) % QI_LENGTH) << shift; memcpy(qi->desc + offset, &desc[i], 1 << shift); qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE; + trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1, + desc[i].qw2, desc[i].qw3); } qi->desc_status[wait_index] = QI_IN_USE; diff --git a/include/trace/events/intel_iommu.h b/include/trace/events/intel_iommu.h index 112bd06487bf..aad2ff0c1e2e 100644 --- a/include/trace/events/intel_iommu.h +++ b/include/trace/events/intel_iommu.h @@ -135,6 +135,43 @@ DEFINE_EVENT(dma_map_sg, bounce_map_sg, struct scatterlist *sg), TP_ARGS(dev, index, total, sg) ); + +TRACE_EVENT(qi_submit, + TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3), + + TP_ARGS(iommu, qw0, qw1, qw2, qw3), + + TP_STRUCT__entry( + __field(u64, qw0) + __field(u64, qw1) + __field(u64, qw2) + __field(u64, qw3) + __string(iommu, iommu->name) + ), + + TP_fast_assign( + __assign_str(iommu, iommu->name); + __entry->qw0 = qw0; + __entry->qw1 = qw1; + __entry->qw2 = qw2; + __entry->qw3 = qw3; + ), + + TP_printk("%s %s: 0x%llx 0x%llx 0x%llx 0x%llx", + __print_symbolic(__entry->qw0 & 0xf, + { QI_CC_TYPE, "cc_inv" }, + { QI_IOTLB_TYPE, "iotlb_inv" }, + { QI_DIOTLB_TYPE, "dev_tlb_inv" }, + { QI_IEC_TYPE, "iec_inv" }, + { QI_IWD_TYPE, "inv_wait" }, + { QI_EIOTLB_TYPE, "p_iotlb_inv" }, + { QI_PC_TYPE, "pc_inv" }, + { QI_DEIOTLB_TYPE, "p_dev_tlb_inv" }, + { QI_PGRP_RESP_TYPE, "page_grp_resp" }), + __get_str(iommu), + __entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3 + ) +); #endif /* _TRACE_INTEL_IOMMU_H */ /* This part must be outside protection */ From a8ce9ebbecdfda3322bbcece6b3b25888217f8e3 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 15 Jan 2021 08:42:02 +0800 Subject: [PATCH 182/414] iommu/vt-d: Preset Access/Dirty bits for IOVA over FL The Access/Dirty bits in the first level page table entry will be set whenever a page table entry was used for address translation or write permission was successfully translated. This is always true when using the first-level page table for kernel IOVA. Instead of wasting hardware cycles to update the certain bits, it's better to set them up at the beginning. Suggested-by: Ashok Raj Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210115004202.953965-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 14 ++++++++++++-- include/linux/intel-iommu.h | 2 ++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f665322a0991..be85af612bc1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1017,8 +1017,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; - if (domain_use_first_level(domain)) + if (domain_use_first_level(domain)) { pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US; + if (domain->domain.type == IOMMU_DOMAIN_DMA) + pteval |= DMA_FL_PTE_ACCESS; + } if (cmpxchg64(&pte->val, 0ULL, pteval)) /* Someone else set it while we were thinking; use theirs. */ free_pgtable_page(tmp_page); @@ -2310,9 +2313,16 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return -EINVAL; attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); - if (domain_use_first_level(domain)) + if (domain_use_first_level(domain)) { attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US; + if (domain->domain.type == IOMMU_DOMAIN_DMA) { + attr |= DMA_FL_PTE_ACCESS; + if (prot & DMA_PTE_WRITE) + attr |= DMA_FL_PTE_DIRTY; + } + } + pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr; while (nr_pages > 0) { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 09c6a0bf3892..ecb35fdff03e 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -42,6 +42,8 @@ #define DMA_FL_PTE_PRESENT BIT_ULL(0) #define DMA_FL_PTE_US BIT_ULL(2) +#define DMA_FL_PTE_ACCESS BIT_ULL(5) +#define DMA_FL_PTE_DIRTY BIT_ULL(6) #define DMA_FL_PTE_XD BIT_ULL(63) #define ADDR_WIDTH_5LEVEL (57) From a8e8af35c9f4f75f981a95488c7066d31bac4bef Mon Sep 17 00:00:00 2001 From: Lianbo Jiang Date: Tue, 26 Jan 2021 19:53:36 +0800 Subject: [PATCH 183/414] dma-iommu: use static-key to minimize the impact in the fast-path Let's move out the is_kdump_kernel() check from iommu_dma_deferred_attach() to iommu_dma_init(), and use the static-key in the fast-path to minimize the impact in the normal case. Co-developed-by: Robin Murphy Signed-off-by: Lianbo Jiang Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/20210126115337.20068-2-lijiang@redhat.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 4078358ed66e..c80056f6c9f9 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -51,6 +51,8 @@ struct iommu_dma_cookie { struct iommu_domain *fq_domain; }; +static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); + void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, struct iommu_domain *domain) { @@ -383,9 +385,6 @@ static int iommu_dma_deferred_attach(struct device *dev, { const struct iommu_ops *ops = domain->ops; - if (!is_kdump_kernel()) - return 0; - if (unlikely(ops->is_attach_deferred && ops->is_attach_deferred(domain, dev))) return iommu_attach_device(domain, dev); @@ -535,7 +534,8 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, size_t iova_off = iova_offset(iovad, phys); dma_addr_t iova; - if (unlikely(iommu_dma_deferred_attach(dev, domain))) + if (static_branch_unlikely(&iommu_deferred_attach_enabled) && + iommu_dma_deferred_attach(dev, domain)) return DMA_MAPPING_ERROR; size = iova_align(iovad, size + iova_off); @@ -693,7 +693,8 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size, *dma_handle = DMA_MAPPING_ERROR; - if (unlikely(iommu_dma_deferred_attach(dev, domain))) + if (static_branch_unlikely(&iommu_deferred_attach_enabled) && + iommu_dma_deferred_attach(dev, domain)) return NULL; min_size = alloc_sizes & -alloc_sizes; @@ -976,7 +977,8 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, unsigned long mask = dma_get_seg_boundary(dev); int i; - if (unlikely(iommu_dma_deferred_attach(dev, domain))) + if (static_branch_unlikely(&iommu_deferred_attach_enabled) && + iommu_dma_deferred_attach(dev, domain)) return 0; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) @@ -1424,6 +1426,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, static int iommu_dma_init(void) { + if (is_kdump_kernel()) + static_branch_enable(&iommu_deferred_attach_enabled); + return iova_cache_get(); } arch_initcall(iommu_dma_init); From 3ab657291638ea267654c3e4798161b2cee6ae01 Mon Sep 17 00:00:00 2001 From: Lianbo Jiang Date: Tue, 26 Jan 2021 19:53:37 +0800 Subject: [PATCH 184/414] iommu: use the __iommu_attach_device() directly for deferred attach Currently, because domain attach allows to be deferred from iommu driver to device driver, and when iommu initializes, the devices on the bus will be scanned and the default groups will be allocated. Due to the above changes, some devices could be added to the same group as below: [ 3.859417] pci 0000:01:00.0: Adding to iommu group 16 [ 3.864572] pci 0000:01:00.1: Adding to iommu group 16 [ 3.869738] pci 0000:02:00.0: Adding to iommu group 17 [ 3.874892] pci 0000:02:00.1: Adding to iommu group 17 But when attaching these devices, it doesn't allow that a group has more than one device, otherwise it will return an error. This conflicts with the deferred attaching. Unfortunately, it has two devices in the same group for my side, for example: [ 9.627014] iommu_group_device_count(): device name[0]:0000:01:00.0 [ 9.633545] iommu_group_device_count(): device name[1]:0000:01:00.1 ... [ 10.255609] iommu_group_device_count(): device name[0]:0000:02:00.0 [ 10.262144] iommu_group_device_count(): device name[1]:0000:02:00.1 Finally, which caused the failure of tg3 driver when tg3 driver calls the dma_alloc_coherent() to allocate coherent memory in the tg3_test_dma(). [ 9.660310] tg3 0000:01:00.0: DMA engine test failed, aborting [ 9.754085] tg3: probe of 0000:01:00.0 failed with error -12 [ 9.997512] tg3 0000:01:00.1: DMA engine test failed, aborting [ 10.043053] tg3: probe of 0000:01:00.1 failed with error -12 [ 10.288905] tg3 0000:02:00.0: DMA engine test failed, aborting [ 10.334070] tg3: probe of 0000:02:00.0 failed with error -12 [ 10.578303] tg3 0000:02:00.1: DMA engine test failed, aborting [ 10.622629] tg3: probe of 0000:02:00.1 failed with error -12 In addition, the similar situations also occur in other drivers such as the bnxt_en driver. That can be reproduced easily in kdump kernel when SME is active. Let's move the handling currently in iommu_dma_deferred_attach() into the iommu core code so that it can call the __iommu_attach_device() directly instead of the iommu_attach_device(). The external interface iommu_attach_device() is not suitable for handling this situation. Signed-off-by: Lianbo Jiang Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20210126115337.20068-3-lijiang@redhat.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 18 +++--------------- drivers/iommu/iommu.c | 10 ++++++++++ include/linux/iommu.h | 1 + 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index c80056f6c9f9..f659395e7959 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -380,18 +380,6 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, return iova_reserve_iommu_regions(dev, domain); } -static int iommu_dma_deferred_attach(struct device *dev, - struct iommu_domain *domain) -{ - const struct iommu_ops *ops = domain->ops; - - if (unlikely(ops->is_attach_deferred && - ops->is_attach_deferred(domain, dev))) - return iommu_attach_device(domain, dev); - - return 0; -} - /** * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API * page flags. @@ -535,7 +523,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, dma_addr_t iova; if (static_branch_unlikely(&iommu_deferred_attach_enabled) && - iommu_dma_deferred_attach(dev, domain)) + iommu_deferred_attach(dev, domain)) return DMA_MAPPING_ERROR; size = iova_align(iovad, size + iova_off); @@ -694,7 +682,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size, *dma_handle = DMA_MAPPING_ERROR; if (static_branch_unlikely(&iommu_deferred_attach_enabled) && - iommu_dma_deferred_attach(dev, domain)) + iommu_deferred_attach(dev, domain)) return NULL; min_size = alloc_sizes & -alloc_sizes; @@ -978,7 +966,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int i; if (static_branch_unlikely(&iommu_deferred_attach_enabled) && - iommu_dma_deferred_attach(dev, domain)) + iommu_deferred_attach(dev, domain)) return 0; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 91f7871f5a37..fd76e2f579fe 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1980,6 +1980,16 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) } EXPORT_SYMBOL_GPL(iommu_attach_device); +int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) +{ + const struct iommu_ops *ops = domain->ops; + + if (ops->is_attach_deferred && ops->is_attach_deferred(domain, dev)) + return __iommu_attach_device(domain, dev); + + return 0; +} + /* * Check flags and other user provided data for valid combinations. We also * make sure no reserved fields or unused flags are set. This is to ensure diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 91d94c014f47..524ffc2ff64f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -376,6 +376,7 @@ int iommu_device_sysfs_add(struct iommu_device *iommu, void iommu_device_sysfs_remove(struct iommu_device *iommu); int iommu_device_link(struct iommu_device *iommu, struct device *link); void iommu_device_unlink(struct iommu_device *iommu, struct device *link); +int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain); static inline void __iommu_device_set_ops(struct iommu_device *iommu, const struct iommu_ops *ops) From 0e0ab04b5bbe84b58097d6dabb3b01b93ce2bf77 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Sat, 23 Jan 2021 17:48:00 +0800 Subject: [PATCH 185/414] RDMA/hns: Refactor the MTR creation flow Split the hns_roce_mtr_create() into serval small functions, remove unused member in 'struct hns_roce_buf_attr' and delete unnecessary MTR page count check flow to make the MTR creation related codes clearer. Link: https://lore.kernel.org/r/1611395282-991-2-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 1 - drivers/infiniband/hw/hns/hns_roce_device.h | 1 - drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 - drivers/infiniband/hw/hns/hns_roce_mr.c | 291 +++++++++----------- drivers/infiniband/hw/hns/hns_roce_qp.c | 1 - drivers/infiniband/hw/hns/hns_roce_srq.c | 2 - 6 files changed, 125 insertions(+), 172 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index ffb7f7e5c641..74fc4940b03a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -206,7 +206,6 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c46b330a8c0a..ffed82d472c4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -335,7 +335,6 @@ struct hns_roce_buf_attr { } region[HNS_ROCE_MAX_BT_REGION]; unsigned int region_count; /* valid region count */ unsigned int page_shift; /* buffer page shift */ - bool fixed_page; /* decide page shift is fixed-size or maximum size */ unsigned int user_access; /* umem access flag */ bool mtt_only; /* only alloc buffer-required MTT memory */ }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4c068899c52b..110354baa465 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5948,7 +5948,6 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) buf_attr.region[0].size = eq->entries * eq->eqe_size; buf_attr.region[0].hopnum = eq->hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, hr_dev->caps.eqe_ba_pg_sz + diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 1fbfa3a37545..45ceeab16a1a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -124,7 +124,6 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, buf_attr.region[0].size = length; buf_attr.region[0].hopnum = mr->pbl_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; buf_attr.user_access = access; /* fast MR's buffer is alloced before mapping, not at creation */ buf_attr.mtt_only = is_fast; @@ -729,25 +728,15 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) } static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - struct hns_roce_buf_attr *buf_attr, bool is_direct, + struct hns_roce_buf_attr *buf_attr, struct ib_udata *udata, unsigned long user_addr) { struct ib_device *ibdev = &hr_dev->ib_dev; - unsigned int best_pg_shift; - int all_pg_count = 0; size_t total_size; - int ret; total_size = mtr_bufs_size(buf_attr); - if (total_size < 1) { - ibdev_err(ibdev, "failed to check mtr size\n."); - return -EINVAL; - } if (udata) { - unsigned long pgsz_bitmap; - unsigned long page_size; - mtr->kmem = NULL; mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); @@ -756,76 +745,67 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, PTR_ERR(mtr->umem)); return -ENOMEM; } - if (buf_attr->fixed_page) - pgsz_bitmap = 1 << buf_attr->page_shift; - else - pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT); - - page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap, - user_addr); - if (!page_size) - return -EINVAL; - best_pg_shift = order_base_2(page_size); - all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size); - ret = 0; } else { mtr->umem = NULL; - mtr->kmem = - hns_roce_buf_alloc(hr_dev, total_size, - buf_attr->page_shift, - is_direct ? HNS_ROCE_BUF_DIRECT : 0); + mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size, + buf_attr->page_shift, + mtr->hem_cfg.is_direct ? + HNS_ROCE_BUF_DIRECT : 0); if (IS_ERR(mtr->kmem)) { ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", PTR_ERR(mtr->kmem)); return PTR_ERR(mtr->kmem); } - - best_pg_shift = buf_attr->page_shift; - all_pg_count = mtr->kmem->npages; } - /* must bigger than minimum hardware page shift */ - if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { - ret = -EINVAL; - ibdev_err(ibdev, - "failed to check mtr, page shift = %u count = %d.\n", - best_pg_shift, all_pg_count); - goto err_alloc_mem; - } - - mtr->hem_cfg.buf_pg_shift = best_pg_shift; - mtr->hem_cfg.buf_pg_count = all_pg_count; - return 0; -err_alloc_mem: - mtr_free_bufs(hr_dev, mtr); - return ret; } -static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int count, unsigned int page_shift) +static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + int page_count, unsigned int page_shift) { struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t *pages; int npage; - int err; + int ret; + + /* alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (!pages) + return -ENOMEM; if (mtr->umem) - npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0, + npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 0, mtr->umem, page_shift); else - npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0, + npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 0, mtr->kmem); + if (npage != page_count) { + ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage, + page_count); + ret = -ENOBUFS; + goto err_alloc_list; + } + if (mtr->hem_cfg.is_direct && npage > 1) { - err = mtr_check_direct_pages(pages, npage, page_shift); - if (err) { - ibdev_err(ibdev, "Failed to check %s direct page-%d\n", - mtr->umem ? "user" : "kernel", err); - npage = err; + ret = mtr_check_direct_pages(pages, npage, page_shift); + if (ret) { + ibdev_err(ibdev, "failed to check %s mtr, idx = %d.\n", + mtr->umem ? "user" : "kernel", ret); + ret = -ENOBUFS; + goto err_alloc_list; } } - return npage; + ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); + if (ret) + ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); + +err_alloc_list: + kvfree(pages); + + return ret; } int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, @@ -928,65 +908,88 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, struct hns_roce_buf_attr *attr, struct hns_roce_hem_cfg *cfg, - unsigned int *buf_page_shift) + unsigned int *buf_page_shift, int unalinged_size) { struct hns_roce_buf_region *r; + int first_region_padding; + int page_cnt, region_cnt; unsigned int page_shift; - int page_cnt = 0; size_t buf_size; - int region_cnt; + /* If mtt is disabled, all pages must be within a continuous range */ + cfg->is_direct = !mtr_has_mtt(attr); + buf_size = mtr_bufs_size(attr); if (cfg->is_direct) { - buf_size = cfg->buf_pg_count << cfg->buf_pg_shift; - page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE); - /* - * When HEM buffer use level-0 addressing, the page size equals - * the buffer size, and the the page size = 4K * 2^N. + /* When HEM buffer uses 0-level addressing, the page size is + * equal to the whole buffer size, and we split the buffer into + * small pages which is used to check whether the adjacent + * units are in the continuous space and its size is fixed to + * 4K based on hns ROCEE's requirement. */ - cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt); - if (attr->region_count > 1) { - cfg->buf_pg_count = page_cnt; - page_shift = HNS_HW_PAGE_SHIFT; - } else { - cfg->buf_pg_count = 1; - page_shift = cfg->buf_pg_shift; - if (buf_size != 1 << page_shift) { - ibdev_err(&hr_dev->ib_dev, - "failed to check direct size %zu shift %d.\n", - buf_size, page_shift); - return -EINVAL; - } - } + page_shift = HNS_HW_PAGE_SHIFT; + + /* The ROCEE requires the page size to be 4K * 2 ^ N. */ + cfg->buf_pg_count = 1; + cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + + order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE)); + first_region_padding = 0; } else { - page_shift = cfg->buf_pg_shift; + page_shift = attr->page_shift; + cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size, + 1 << page_shift); + cfg->buf_pg_shift = page_shift; + first_region_padding = unalinged_size; } - /* convert buffer size to page index and page count */ - for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count && - region_cnt < attr->region_count && + /* Convert buffer size to page index and page count for each region and + * the buffer's offset needs to be appended to the first region. + */ + for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count && region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) { r = &cfg->region[region_cnt]; r->offset = page_cnt; - buf_size = hr_hw_page_align(attr->region[region_cnt].size); + buf_size = hr_hw_page_align(attr->region[region_cnt].size + + first_region_padding); r->count = DIV_ROUND_UP(buf_size, 1 << page_shift); + first_region_padding = 0; page_cnt += r->count; r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum, r->count); } - if (region_cnt < 1) { - ibdev_err(&hr_dev->ib_dev, - "failed to check mtr region count, pages = %d.\n", - cfg->buf_pg_count); - return -ENOBUFS; - } - cfg->region_count = region_cnt; *buf_page_shift = page_shift; return page_cnt; } +static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + unsigned int ba_page_shift) +{ + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int ret; + + hns_roce_hem_list_init(&mtr->hem_list); + if (!cfg->is_direct) { + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + cfg->region, cfg->region_count, + ba_page_shift); + if (ret) + return ret; + cfg->root_ba = mtr->hem_list.root_ba; + cfg->ba_pg_shift = ba_page_shift; + } else { + cfg->ba_pg_shift = cfg->buf_pg_shift; + } + + return 0; +} + +static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +} + /** * hns_roce_mtr_create - Create hns memory translate region. * @@ -1002,95 +1005,51 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, unsigned int ba_page_shift, struct ib_udata *udata, unsigned long user_addr) { - struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; struct ib_device *ibdev = &hr_dev->ib_dev; unsigned int buf_page_shift = 0; - dma_addr_t *pages = NULL; - int all_pg_cnt; - int get_pg_cnt; - int ret = 0; + int buf_page_cnt; + int ret; - /* if disable mtt, all pages must in a continuous address range */ - cfg->is_direct = !mtr_has_mtt(buf_attr); + buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg, + &buf_page_shift, + udata ? user_addr & ~PAGE_MASK : 0); + if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) { + ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %d.\n", + buf_page_cnt, buf_page_shift); + return -EINVAL; + } - /* if buffer only need mtt, just init the hem cfg */ + ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift); + if (ret) { + ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret); + return ret; + } + + /* The caller has its own buffer list and invokes the hns_roce_mtr_map() + * to finish the MTT configuration. + */ if (buf_attr->mtt_only) { - cfg->buf_pg_shift = buf_attr->page_shift; - cfg->buf_pg_count = mtr_bufs_size(buf_attr) >> - buf_attr->page_shift; mtr->umem = NULL; mtr->kmem = NULL; - } else { - ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct, - udata, user_addr); - if (ret) { - ibdev_err(ibdev, - "failed to alloc mtr bufs, ret = %d.\n", ret); - return ret; - } + return 0; } - all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift); - if (all_pg_cnt < 1) { - ret = -ENOBUFS; - ibdev_err(ibdev, "failed to init mtr buf cfg.\n"); - goto err_alloc_bufs; + ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr); + if (ret) { + ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret); + goto err_alloc_mtt; } - hns_roce_hem_list_init(&mtr->hem_list); - if (!cfg->is_direct) { - ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, - cfg->region, cfg->region_count, - ba_page_shift); - if (ret) { - ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n", - ret); - goto err_alloc_bufs; - } - cfg->root_ba = mtr->hem_list.root_ba; - cfg->ba_pg_shift = ba_page_shift; - } else { - cfg->ba_pg_shift = cfg->buf_pg_shift; - } - - /* no buffer to map */ - if (buf_attr->mtt_only) + /* Write buffer's dma address to MTT */ + ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift); + if (ret) + ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret); + else return 0; - /* alloc a tmp array to store buffer's dma address */ - pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); - if (!pages) { - ret = -ENOMEM; - ibdev_err(ibdev, "failed to alloc mtr page list %d.\n", - all_pg_cnt); - goto err_alloc_hem_list; - } - - get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, - buf_page_shift); - if (get_pg_cnt != all_pg_cnt) { - ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", - get_pg_cnt, all_pg_cnt); - ret = -ENOBUFS; - goto err_alloc_page_list; - } - - /* write buffer's dma address to BA table */ - ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); - if (ret) { - ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); - goto err_alloc_page_list; - } - - /* drop tmp array */ - kvfree(pages); - return 0; -err_alloc_page_list: - kvfree(pages); -err_alloc_hem_list: - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); -err_alloc_bufs: mtr_free_bufs(hr_dev, mtr); +err_alloc_mtt: + mtr_free_mtt(hr_dev, mtr); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index d8e2fe5558d2..9988ca9bd405 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -599,7 +599,6 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, return -EINVAL; buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; - buf_attr->fixed_page = true; buf_attr->region_count = idx; return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index c4ae57e4173a..94038280a3ec 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -194,7 +194,6 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, srq->wqe_shift); buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, hr_dev->caps.srqwqe_ba_pg_sz + @@ -226,7 +225,6 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, srq->idx_que.entry_shift); buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, From 4e9fc1dae2a96e7df9b923c6de76527da9c18dda Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Sat, 23 Jan 2021 17:48:01 +0800 Subject: [PATCH 186/414] RDMA/hns: Optimize the MR registration process When creating or re-registering an MR, storing the PDN, access flag and IOVA information ASAP can simplify the number of parameters passed into the subsequent process. Link: https://lore.kernel.org/r/1611395282-991-3-git-send-email-liweihang@huawei.com Reported-by: kernel test robot Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 22 ++-- drivers/infiniband/hw/hns/hns_roce_mr.c | 110 +++++++------------- 3 files changed, 47 insertions(+), 89 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ffed82d472c4..f62851faf9cc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -914,8 +914,7 @@ struct hns_roce_hw { int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, struct hns_roce_mr *mr, unsigned long mtpt_idx); int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr, int flags, u32 pdn, - int mr_access_flags, u64 iova, u64 size, + struct hns_roce_mr *mr, int flags, void *mb_buf); int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, struct hns_roce_mr *mr); @@ -1284,7 +1283,6 @@ u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); - int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 110354baa465..16ef6316ccb2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2927,20 +2927,17 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, int flags, - u32 pdn, int mr_access_flags, u64 iova, - u64 size, void *mb_buf) + void *mb_buf) { struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf; + u32 mr_access_flags = mr->access; int ret = 0; roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); - if (flags & IB_MR_REREG_PD) { - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, pdn); - mr->pd = pdn; - } + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, + V2_MPT_BYTE_4_PD_S, mr->pd); if (flags & IB_MR_REREG_ACCESS) { roce_set_bit(mpt_entry->byte_8_mw_cnt_en, @@ -2958,13 +2955,10 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, } if (flags & IB_MR_REREG_TRANS) { - mpt_entry->va_l = cpu_to_le32(lower_32_bits(iova)); - mpt_entry->va_h = cpu_to_le32(upper_32_bits(iova)); - mpt_entry->len_l = cpu_to_le32(lower_32_bits(size)); - mpt_entry->len_h = cpu_to_le32(upper_32_bits(size)); - - mr->iova = iova; - mr->size = size; + mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova)); + mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova)); + mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); + mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size)); ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 45ceeab16a1a..5a2a557c37b8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -66,8 +66,7 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, - u32 pd, u64 iova, u64 size, u32 access) +static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { struct ib_device *ibdev = &hr_dev->ib_dev; unsigned long obj = 0; @@ -82,11 +81,6 @@ static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, return -ENOMEM; } - mr->iova = iova; /* MR va starting addr */ - mr->size = size; /* MR addr range */ - mr->pd = pd; /* MR num */ - mr->access = access; /* MR access permit */ - mr->enabled = 0; /* MR active status */ mr->key = hw_index_to_key(obj); /* MR key */ err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj); @@ -110,8 +104,7 @@ static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) } static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, - size_t length, struct ib_udata *udata, u64 start, - int access) + struct ib_udata *udata, u64 start) { struct ib_device *ibdev = &hr_dev->ib_dev; bool is_fast = mr->type == MR_TYPE_FRMR; @@ -121,10 +114,10 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num; buf_attr.page_shift = is_fast ? PAGE_SHIFT : hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT; - buf_attr.region[0].size = length; + buf_attr.region[0].size = mr->size; buf_attr.region[0].hopnum = mr->pbl_hop_num; buf_attr.region_count = 1; - buf_attr.user_access = access; + buf_attr.user_access = mr->access; /* fast MR's buffer is alloced before mapping, not at creation */ buf_attr.mtt_only = is_fast; @@ -196,9 +189,6 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, } mr->enabled = 1; - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return 0; err_page: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -236,14 +226,16 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_DMA; + mr->pd = to_hr_pd(pd)->pdn; + mr->access = acc; /* Allocate memory region key */ hns_roce_hem_list_init(&mr->pbl_mtr.hem_list); - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc); + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_free; - ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr); + ret = hns_roce_mr_enable(hr_dev, mr); if (ret) goto err_mr; @@ -270,13 +262,17 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); + mr->iova = virt_addr; + mr->size = length; + mr->pd = to_hr_pd(pd)->pdn; + mr->access = access_flags; mr->type = MR_TYPE_MR; - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length, - access_flags); + + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_alloc_mr; - ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags); + ret = alloc_mr_pbl(hr_dev, mr, udata, start); if (ret) goto err_alloc_key; @@ -298,35 +294,6 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(ret); } -static int rereg_mr_trans(struct ib_mr *ibmr, int flags, - u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct hns_roce_cmd_mailbox *mailbox, - u32 pdn, struct ib_udata *udata) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_mr *mr = to_hr_mr(ibmr); - int ret; - - free_mr_pbl(hr_dev, mr); - ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags); - if (ret) { - ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret); - return ret; - } - - ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, - mr_access_flags, virt_addr, - length, mailbox->buf); - if (ret) { - ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret); - free_mr_pbl(hr_dev, mr); - } - - return ret; -} - struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, @@ -337,7 +304,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, struct hns_roce_mr *mr = to_hr_mr(ibmr); struct hns_roce_cmd_mailbox *mailbox; unsigned long mtpt_idx; - u32 pdn = 0; int ret; if (!mr->enabled) @@ -359,23 +325,29 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret); mr->enabled = 0; + mr->iova = virt_addr; + mr->size = length; if (flags & IB_MR_REREG_PD) - pdn = to_hr_pd(pd)->pdn; + mr->pd = to_hr_pd(pd)->pdn; + + if (flags & IB_MR_REREG_ACCESS) + mr->access = mr_access_flags; if (flags & IB_MR_REREG_TRANS) { - ret = rereg_mr_trans(ibmr, flags, - start, length, - virt_addr, mr_access_flags, - mailbox, pdn, udata); - if (ret) - goto free_cmd_mbox; - } else { - ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, - mr_access_flags, virt_addr, - length, mailbox->buf); - if (ret) + free_mr_pbl(hr_dev, mr); + ret = alloc_mr_pbl(hr_dev, mr, udata, start); + if (ret) { + ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n", + ret); goto free_cmd_mbox; + } + } + + ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf); + if (ret) { + ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret); + goto free_cmd_mbox; } ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); @@ -385,12 +357,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, } mr->enabled = 1; - if (flags & IB_MR_REREG_ACCESS) - mr->access = mr_access_flags; - - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return NULL; free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -420,7 +386,6 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; - u64 length; int ret; if (mr_type != IB_MR_TYPE_MEM_REG) @@ -437,14 +402,15 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_FRMR; + mr->pd = to_hr_pd(pd)->pdn; + mr->size = max_num_sg * (1 << PAGE_SHIFT); /* Allocate memory region key */ - length = max_num_sg * (1 << PAGE_SHIFT); - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0); + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_free; - ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0); + ret = alloc_mr_pbl(hr_dev, mr, NULL, 0); if (ret) goto err_key; @@ -453,7 +419,7 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, goto err_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->ibmr.length = length; + mr->ibmr.length = mr->size; return &mr->ibmr; From dc504774408b8ee6ce4967fbacb8b0d56588dc71 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Sat, 23 Jan 2021 17:48:02 +0800 Subject: [PATCH 187/414] RDMA/hns: Use new interface to set MPT related fields Achieve hr_reg_write() to simply the codes to fill fields. Link: https://lore.kernel.org/r/1611395282-991-4-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_common.h | 22 +++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 52 +++++++++------------ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 39 ++++++++++++++++ 3 files changed, 84 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 5afee04fb02c..3ca6e88f77bf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -32,6 +32,7 @@ #ifndef _HNS_ROCE_COMMON_H #define _HNS_ROCE_COMMON_H +#include #define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg)) #define roce_read(dev, reg) readl((dev)->reg_base + (reg)) @@ -65,6 +66,27 @@ #define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field) +#define _hr_reg_clear(ptr, field_type, field_h, field_l) \ + ({ \ + const field_type *_ptr = ptr; \ + *((__le32 *)_ptr + (field_h) / 32) &= \ + cpu_to_le32( \ + ~GENMASK((field_h) % 32, (field_l) % 32)) + \ + BUILD_BUG_ON_ZERO(((field_h) / 32) != \ + ((field_l) / 32)); \ + }) + +#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field) + +#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \ + ({ \ + _hr_reg_clear(ptr, field_type, field_h, field_l); \ + *((__le32 *)ptr + (field_h) / 32) |= cpu_to_le32(FIELD_PREP( \ + GENMASK((field_h) % 32, (field_l) % 32), val)); \ + }) + +#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 16ef6316ccb2..a5bbfb1c009b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2880,36 +2880,20 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, - V2_MPT_BYTE_4_PBL_HOP_NUM_S, mr->pbl_hop_num == - HNS_ROCE_HOP_NUM_0 ? 0 : mr->pbl_hop_num); - roce_set_field(mpt_entry->byte_4_pd_hop_st, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, mr->pd); + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); + hr_reg_write(mpt_entry, MPT_PD, mr->pd); + hr_reg_enable(mpt_entry, MPT_L_INV_EN); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, - (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, - mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, - (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, - (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, - (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0)); - - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, - mr->type == MR_TYPE_MR ? 0 : 1); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S, - 1); + hr_reg_write(mpt_entry, MPT_BIND_EN, + !!(mr->access & IB_ACCESS_MW_BIND)); + hr_reg_write(mpt_entry, MPT_ATOMIC_EN, + !!(mr->access & IB_ACCESS_REMOTE_ATOMIC)); + hr_reg_write(mpt_entry, MPT_RR_EN, + !!(mr->access & IB_ACCESS_REMOTE_READ)); + hr_reg_write(mpt_entry, MPT_RW_EN, + !!(mr->access & IB_ACCESS_REMOTE_WRITE)); + hr_reg_write(mpt_entry, MPT_LW_EN, + !!((mr->access & IB_ACCESS_LOCAL_WRITE))); mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size)); @@ -2917,9 +2901,19 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova)); mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova)); + if (mr->type != MR_TYPE_MR) + hr_reg_enable(mpt_entry, MPT_PA); + if (mr->type == MR_TYPE_DMA) return 0; + if (mr->pbl_hop_num != HNS_ROCE_HOP_NUM_0) + hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, mr->pbl_hop_num); + + hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); + hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD); + ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); return ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index bdaccf86460d..69bc072a941d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -993,6 +993,45 @@ struct hns_roce_v2_mpt_entry { __le32 byte_64_buf_pa1; }; +#define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l) + +#define MPT_ST MPT_FIELD_LOC(1, 0) +#define MPT_PBL_HOP_NUM MPT_FIELD_LOC(3, 2) +#define MPT_PBL_BA_PG_SZ MPT_FIELD_LOC(7, 4) +#define MPT_PD MPT_FIELD_LOC(31, 8) +#define MPT_RA_EN MPT_FIELD_LOC(32, 32) +#define MPT_R_INV_EN MPT_FIELD_LOC(33, 33) +#define MPT_L_INV_EN MPT_FIELD_LOC(34, 34) +#define MPT_BIND_EN MPT_FIELD_LOC(35, 35) +#define MPT_ATOMIC_EN MPT_FIELD_LOC(36, 36) +#define MPT_RR_EN MPT_FIELD_LOC(37, 37) +#define MPT_RW_EN MPT_FIELD_LOC(38, 38) +#define MPT_LW_EN MPT_FIELD_LOC(39, 39) +#define MPT_MW_CNT MPT_FIELD_LOC(63, 40) +#define MPT_FRE MPT_FIELD_LOC(64, 64) +#define MPT_PA MPT_FIELD_LOC(65, 65) +#define MPT_ZBVA MPT_FIELD_LOC(66, 66) +#define MPT_SHARE MPT_FIELD_LOC(67, 67) +#define MPT_MR_MW MPT_FIELD_LOC(68, 68) +#define MPT_BPD MPT_FIELD_LOC(69, 69) +#define MPT_BQP MPT_FIELD_LOC(70, 70) +#define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71) +#define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72) +#define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96) +#define MPT_LEN MPT_FIELD_LOC(191, 128) +#define MPT_LKEY MPT_FIELD_LOC(223, 192) +#define MPT_VA MPT_FIELD_LOC(287, 224) +#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288) +#define MPT_PBL_BA MPT_FIELD_LOC(380, 320) +#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381) +#define MPT_RSV0 MPT_FIELD_LOC(383, 382) +#define MPT_PA0 MPT_FIELD_LOC(441, 384) +#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442) +#define MPT_PA1 MPT_FIELD_LOC(505, 448) +#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506) +#define MPT_RSV2 MPT_FIELD_LOC(507, 507) +#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508) + #define V2_MPT_BYTE_4_MPT_ST_S 0 #define V2_MPT_BYTE_4_MPT_ST_M GENMASK(1, 0) From c9b258c6be09283663c6851725b322568d867c0b Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:36:54 -0600 Subject: [PATCH 188/414] iommu/amd: Prepare for generic IO page table framework Add initial hook up code to implement generic IO page table framework. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-3-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu_types.h | 35 +++++++++++++++ drivers/iommu/amd/io_pgtable.c | 69 +++++++++++++++++++++++++++++ drivers/iommu/amd/iommu.c | 10 ----- drivers/iommu/io-pgtable.c | 3 ++ include/linux/io-pgtable.h | 2 + 7 files changed, 111 insertions(+), 11 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 626b97d0dd21..a3cbafb603f5 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -10,6 +10,7 @@ config AMD_IOMMU select IOMMU_API select IOMMU_IOVA select IOMMU_DMA + select IOMMU_IO_PGTABLE depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help With this option you can enable support for AMD IOMMU hardware in diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index dc5a2fa4fd37..a935f8f4b974 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 553587827771..b2365924d898 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * Maximum number of IOMMUs supported @@ -252,6 +253,19 @@ #define GA_GUEST_NR 0x1 +#define IOMMU_IN_ADDR_BIT_SIZE 52 +#define IOMMU_OUT_ADDR_BIT_SIZE 52 + +/* + * This bitmap is used to advertise the page sizes our hardware support + * to the IOMMU core, which will then use this information to split + * physically contiguous memory regions it is mapping into page sizes + * that we support. + * + * 512GB Pages are not supported due to a hardware bug + */ +#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) + /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) #define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) @@ -466,6 +480,26 @@ struct amd_irte_ops; #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) +#define io_pgtable_to_data(x) \ + container_of((x), struct amd_io_pgtable, iop) + +#define io_pgtable_ops_to_data(x) \ + io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) + +#define io_pgtable_ops_to_domain(x) \ + container_of(io_pgtable_ops_to_data(x), \ + struct protection_domain, iop) + +#define io_pgtable_cfg_to_data(x) \ + container_of((x), struct amd_io_pgtable, pgtbl_cfg) + +struct amd_io_pgtable { + struct io_pgtable_cfg pgtbl_cfg; + struct io_pgtable iop; + int mode; + u64 *root; +}; + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -474,6 +508,7 @@ struct protection_domain { struct list_head dev_list; /* List of all devices in this domain */ struct iommu_domain domain; /* generic domain handle used by iommu core code */ + struct amd_io_pgtable iop; spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ atomic64_t pt_root; /* pgtable root and pgtable mode */ diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c new file mode 100644 index 000000000000..cf2cd2d2e43e --- /dev/null +++ b/drivers/iommu/amd/io_pgtable.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table allocator. + * + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +static void v1_tlb_flush_all(void *cookie) +{ +} + +static void v1_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v1_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) +{ +} + +static const struct iommu_flush_ops v1_flush_ops = { + .tlb_flush_all = v1_tlb_flush_all, + .tlb_flush_walk = v1_tlb_flush_walk, + .tlb_add_page = v1_tlb_add_page, +}; + +/* + * ---------------------------------------------------- + */ +static void v1_free_pgtable(struct io_pgtable *iop) +{ +} + +static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); + + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES, + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, + cfg->tlb = &v1_flush_ops; + + return &pgtable->iop; +} + +struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = { + .alloc = v1_alloc_pgtable, + .free = v1_free_pgtable, +}; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3485f8b60d7a..18d8c97f681e 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -57,16 +57,6 @@ #define HT_RANGE_START (0xfd00000000ULL) #define HT_RANGE_END (0xffffffffffULL) -/* - * This bitmap is used to advertise the page sizes our hardware support - * to the IOMMU core, which will then use this information to split - * physically contiguous memory regions it is mapping into page sizes - * that we support. - * - * 512GB Pages are not supported due to a hardware bug - */ -#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) - #define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL static DEFINE_SPINLOCK(pd_bitmap_lock); diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index 94394c81468f..6e9917ce980f 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -24,6 +24,9 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S [ARM_V7S] = &io_pgtable_arm_v7s_init_fns, #endif +#ifdef CONFIG_AMD_IOMMU + [AMD_IOMMU_V1] = &io_pgtable_amd_iommu_v1_init_fns, +#endif }; struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ea727eb1a1a9..f58c14218ee1 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -15,6 +15,7 @@ enum io_pgtable_fmt { ARM_64_LPAE_S2, ARM_V7S, ARM_MALI_LPAE, + AMD_IOMMU_V1, IO_PGTABLE_NUM_FMTS, }; @@ -251,5 +252,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns; +extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns; #endif /* __IO_PGTABLE_H */ From d2272ec7f946470e861b77572a2f31325faf59c6 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:36:55 -0600 Subject: [PATCH 189/414] iommu/amd: Move pt_root to struct amd_io_pgtable To better organize the data structure since it contains IO page table related information. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-4-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/amd_iommu_types.h | 2 +- drivers/iommu/amd/iommu.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 0817bc732d1a..b8dae3941f0f 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -105,7 +105,7 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) static inline void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) { - atomic64_set(&domain->pt_root, root); + atomic64_set(&domain->iop.pt_root, root); } static inline diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index b2365924d898..a50f117879f2 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -498,6 +498,7 @@ struct amd_io_pgtable { struct io_pgtable iop; int mode; u64 *root; + atomic64_t pt_root; /* pgtable root and pgtable mode */ }; /* @@ -511,7 +512,6 @@ struct protection_domain { struct amd_io_pgtable iop; spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - atomic64_t pt_root; /* pgtable root and pgtable mode */ int glx; /* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 18d8c97f681e..2a4730380462 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -144,7 +144,7 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, struct domain_pgtable *pgtable) { - u64 pt_root = atomic64_read(&domain->pt_root); + u64 pt_root = atomic64_read(&domain->iop.pt_root); pgtable->root = (u64 *)(pt_root & PAGE_MASK); pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ From 1f5855306676cf1d10785043d8fdc7a82bebd50b Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:36:56 -0600 Subject: [PATCH 190/414] iommu/amd: Convert to using amd_io_pgtable Make use of the new struct amd_io_pgtable in preparation to remove the struct domain_pgtable. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-5-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/iommu.c | 25 ++++++++++--------------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index b8dae3941f0f..bf9723b35e77 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -56,6 +56,7 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); +extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, unsigned long cr3); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 2a4730380462..763efee81524 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -88,8 +88,6 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static void detach_device(struct device *dev); -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable); /**************************************************************************** * @@ -1501,7 +1499,7 @@ static bool increase_address_space(struct protection_domain *domain, pgtable.root = pte; pgtable.mode += 1; - update_and_flush_device_table(domain, &pgtable); + amd_iommu_update_and_flush_device_table(domain); domain_flush_complete(domain); /* @@ -1876,17 +1874,16 @@ static void free_gcr3_table(struct protection_domain *domain) } static void set_dte_entry(u16 devid, struct protection_domain *domain, - struct domain_pgtable *pgtable, bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; - if (pgtable->mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(pgtable->root); + if (domain->iop.mode != PAGE_MODE_NONE) + pte_root = iommu_virt_to_phys(domain->iop.root); - pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK) + pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; @@ -1976,7 +1973,7 @@ static void do_attach(struct iommu_dev_data *dev_data, /* Update device table */ amd_iommu_domain_get_pgtable(domain, &pgtable); - set_dte_entry(dev_data->devid, domain, &pgtable, + set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); clone_aliases(dev_data->pdev); @@ -2283,22 +2280,20 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain, * *****************************************************************************/ -static void update_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +static void update_device_table(struct protection_domain *domain) { struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, pgtable, + set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); clone_aliases(dev_data->pdev); } } -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) { - update_device_table(domain, pgtable); + update_device_table(domain); domain_flush_devices(domain); } @@ -2308,7 +2303,7 @@ static void update_domain(struct protection_domain *domain) /* Update device table */ amd_iommu_domain_get_pgtable(domain, &pgtable); - update_and_flush_device_table(domain, &pgtable); + amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ domain_flush_tlb_pde(domain); From f9b4df790aa4372bfa11b7d212e537b763295429 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:36:57 -0600 Subject: [PATCH 191/414] iommu/amd: Declare functions as extern And move declaration to header file so that they can be included across multiple files. There is no functional change. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-6-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 3 +++ drivers/iommu/amd/iommu.c | 39 +++++++++++++++++------------------ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index bf9723b35e77..bf29ab8c99f0 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -57,6 +57,9 @@ extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); +extern void amd_iommu_domain_update(struct protection_domain *domain); +extern void amd_iommu_domain_flush_complete(struct protection_domain *domain); +extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, unsigned long cr3); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 763efee81524..76e69c510db8 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -86,7 +86,6 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; -static void update_domain(struct protection_domain *domain); static void detach_device(struct device *dev); /**************************************************************************** @@ -1313,12 +1312,12 @@ static void domain_flush_pages(struct protection_domain *domain, } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void domain_flush_tlb_pde(struct protection_domain *domain) +void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain) { __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } -static void domain_flush_complete(struct protection_domain *domain) +void amd_iommu_domain_flush_complete(struct protection_domain *domain) { int i; @@ -1343,7 +1342,7 @@ static void domain_flush_np_cache(struct protection_domain *domain, spin_lock_irqsave(&domain->lock, flags); domain_flush_pages(domain, iova, size); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } } @@ -1500,7 +1499,7 @@ static bool increase_address_space(struct protection_domain *domain, pgtable.root = pte; pgtable.mode += 1; amd_iommu_update_and_flush_device_table(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* * Device Table needs to be updated and flushed before the new root can @@ -1753,8 +1752,8 @@ static int iommu_map_page(struct protection_domain *dom, * Updates and flushing already happened in * increase_address_space(). */ - domain_flush_tlb_pde(dom); - domain_flush_complete(dom); + amd_iommu_domain_flush_tlb_pde(dom); + amd_iommu_domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); } @@ -1997,10 +1996,10 @@ static void do_detach(struct iommu_dev_data *dev_data) device_flush_dte(dev_data); /* Flush IOTLB */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); /* Wait for the flushes to finish */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* decrease reference counters - needs to happen after the flushes */ domain->dev_iommu[iommu->index] -= 1; @@ -2133,9 +2132,9 @@ static int attach_device(struct device *dev, * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); out: spin_unlock(&dev_data->lock); @@ -2297,7 +2296,7 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) domain_flush_devices(domain); } -static void update_domain(struct protection_domain *domain) +void amd_iommu_domain_update(struct protection_domain *domain) { struct domain_pgtable pgtable; @@ -2306,8 +2305,8 @@ static void update_domain(struct protection_domain *domain) amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ - domain_flush_tlb_pde(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_complete(domain); } int __init amd_iommu_init_api(void) @@ -2695,8 +2694,8 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) unsigned long flags; spin_lock_irqsave(&dom->lock, flags); - domain_flush_tlb_pde(dom); - domain_flush_complete(dom); + amd_iommu_domain_flush_tlb_pde(dom); + amd_iommu_domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); } @@ -2786,7 +2785,7 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom) amd_iommu_domain_clr_pt_root(domain); /* Make changes visible to IOMMUs */ - update_domain(domain); + amd_iommu_domain_update(domain); /* Page-table is not visible to IOMMU anymore, so free it */ free_pagetable(&pgtable); @@ -2830,7 +2829,7 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) domain->glx = levels; domain->flags |= PD_IOMMUV2_MASK; - update_domain(domain); + amd_iommu_domain_update(domain); ret = 0; @@ -2867,7 +2866,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, } /* Wait until IOMMU TLB flushes are complete */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* Now flush device TLBs */ list_for_each_entry(dev_data, &domain->dev_list, list) { @@ -2893,7 +2892,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, } /* Wait until all device TLBs are flushed */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); ret = 0; From 18954252a1d0b12e1b77087b55c37fb43b09e12a Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:36:58 -0600 Subject: [PATCH 192/414] iommu/amd: Move IO page table related functions Preparing to migrate to use IO page table framework. There is no functional change. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-7-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 18 ++ drivers/iommu/amd/io_pgtable.c | 473 ++++++++++++++++++++++++++++++++ drivers/iommu/amd/iommu.c | 476 +-------------------------------- 3 files changed, 493 insertions(+), 474 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index bf29ab8c99f0..1bad42a3c73c 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -131,4 +131,22 @@ void amd_iommu_apply_ivrs_quirks(void); static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif +/* TODO: These are temporary and will be removed once fully transition */ +extern void free_pagetable(struct domain_pgtable *pgtable); +extern int iommu_map_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long phys_addr, + unsigned long page_size, + int prot, + gfp_t gfp); +extern unsigned long iommu_unmap_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long page_size); +extern u64 *fetch_pte(struct protection_domain *domain, + unsigned long address, + unsigned long *page_size); +extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain, + struct domain_pgtable *pgtable); +extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, + u64 *root, int mode); #endif diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index cf2cd2d2e43e..dbc0ad775d5e 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -44,6 +44,479 @@ static const struct iommu_flush_ops v1_flush_ops = { .tlb_add_page = v1_tlb_add_page, }; +/* + * Helper function to get the first pte of a large mapping + */ +static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, + unsigned long *count) +{ + unsigned long pte_mask, pg_size, cnt; + u64 *fpte; + + pg_size = PTE_PAGE_SIZE(*pte); + cnt = PAGE_SIZE_PTE_COUNT(pg_size); + pte_mask = ~((cnt << 3) - 1); + fpte = (u64 *)(((unsigned long)pte) & pte_mask); + + if (page_size) + *page_size = pg_size; + + if (count) + *count = cnt; + + return fpte; +} + +/**************************************************************************** + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + ****************************************************************************/ + +static void free_page_list(struct page *freelist) +{ + while (freelist != NULL) { + unsigned long p = (unsigned long)page_address(freelist); + + freelist = freelist->freelist; + free_page(p); + } +} + +static struct page *free_pt_page(unsigned long pt, struct page *freelist) +{ + struct page *p = virt_to_page((void *)pt); + + p->freelist = freelist; + + return p; +} + +#define DEFINE_FREE_PT_FN(LVL, FN) \ +static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ +{ \ + unsigned long p; \ + u64 *pt; \ + int i; \ + \ + pt = (u64 *)__pt; \ + \ + for (i = 0; i < 512; ++i) { \ + /* PTE present? */ \ + if (!IOMMU_PTE_PRESENT(pt[i])) \ + continue; \ + \ + /* Large PTE? */ \ + if (PM_PTE_LEVEL(pt[i]) == 0 || \ + PM_PTE_LEVEL(pt[i]) == 7) \ + continue; \ + \ + p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ + freelist = FN(p, freelist); \ + } \ + \ + return free_pt_page((unsigned long)pt, freelist); \ +} + +DEFINE_FREE_PT_FN(l2, free_pt_page) +DEFINE_FREE_PT_FN(l3, free_pt_l2) +DEFINE_FREE_PT_FN(l4, free_pt_l3) +DEFINE_FREE_PT_FN(l5, free_pt_l4) +DEFINE_FREE_PT_FN(l6, free_pt_l5) + +static struct page *free_sub_pt(unsigned long root, int mode, + struct page *freelist) +{ + switch (mode) { + case PAGE_MODE_NONE: + case PAGE_MODE_7_LEVEL: + break; + case PAGE_MODE_1_LEVEL: + freelist = free_pt_page(root, freelist); + break; + case PAGE_MODE_2_LEVEL: + freelist = free_pt_l2(root, freelist); + break; + case PAGE_MODE_3_LEVEL: + freelist = free_pt_l3(root, freelist); + break; + case PAGE_MODE_4_LEVEL: + freelist = free_pt_l4(root, freelist); + break; + case PAGE_MODE_5_LEVEL: + freelist = free_pt_l5(root, freelist); + break; + case PAGE_MODE_6_LEVEL: + freelist = free_pt_l6(root, freelist); + break; + default: + BUG(); + } + + return freelist; +} + +void free_pagetable(struct domain_pgtable *pgtable) +{ + struct page *freelist = NULL; + unsigned long root; + + if (pgtable->mode == PAGE_MODE_NONE) + return; + + BUG_ON(pgtable->mode < PAGE_MODE_NONE || + pgtable->mode > PAGE_MODE_6_LEVEL); + + root = (unsigned long)pgtable->root; + freelist = free_sub_pt(root, pgtable->mode, freelist); + + free_page_list(freelist); +} + +void amd_iommu_domain_set_pgtable(struct protection_domain *domain, + u64 *root, int mode) +{ + u64 pt_root; + + /* lowest 3 bits encode pgtable mode */ + pt_root = mode & 7; + pt_root |= (u64)root; + + amd_iommu_domain_set_pt_root(domain, pt_root); +} + +/* + * This function is used to add another level to an IO page table. Adding + * another level increases the size of the address space by 9 bits to a size up + * to 64 bits. + */ +static bool increase_address_space(struct protection_domain *domain, + unsigned long address, + gfp_t gfp) +{ + struct domain_pgtable pgtable; + unsigned long flags; + bool ret = true; + u64 *pte; + + spin_lock_irqsave(&domain->lock, flags); + + amd_iommu_domain_get_pgtable(domain, &pgtable); + + if (address <= PM_LEVEL_SIZE(pgtable.mode)) + goto out; + + ret = false; + if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) + goto out; + + pte = (void *)get_zeroed_page(gfp); + if (!pte) + goto out; + + *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); + + pgtable.root = pte; + pgtable.mode += 1; + amd_iommu_update_and_flush_device_table(domain); + amd_iommu_domain_flush_complete(domain); + + /* + * Device Table needs to be updated and flushed before the new root can + * be published. + */ + amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode); + + ret = true; + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} + +static u64 *alloc_pte(struct protection_domain *domain, + unsigned long address, + unsigned long page_size, + u64 **pte_page, + gfp_t gfp, + bool *updated) +{ + struct domain_pgtable pgtable; + int level, end_lvl; + u64 *pte, *page; + + BUG_ON(!is_power_of_2(page_size)); + + amd_iommu_domain_get_pgtable(domain, &pgtable); + + while (address > PM_LEVEL_SIZE(pgtable.mode)) { + /* + * Return an error if there is no memory to update the + * page-table. + */ + if (!increase_address_space(domain, address, gfp)) + return NULL; + + /* Read new values to check if update was successful */ + amd_iommu_domain_get_pgtable(domain, &pgtable); + } + + + level = pgtable.mode - 1; + pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; + address = PAGE_SIZE_ALIGN(address, page_size); + end_lvl = PAGE_SIZE_LEVEL(page_size); + + while (level > end_lvl) { + u64 __pte, __npte; + int pte_level; + + __pte = *pte; + pte_level = PM_PTE_LEVEL(__pte); + + /* + * If we replace a series of large PTEs, we need + * to tear down all of them. + */ + if (IOMMU_PTE_PRESENT(__pte) && + pte_level == PAGE_MODE_7_LEVEL) { + unsigned long count, i; + u64 *lpte; + + lpte = first_pte_l7(pte, NULL, &count); + + /* + * Unmap the replicated PTEs that still match the + * original large mapping + */ + for (i = 0; i < count; ++i) + cmpxchg64(&lpte[i], __pte, 0ULL); + + *updated = true; + continue; + } + + if (!IOMMU_PTE_PRESENT(__pte) || + pte_level == PAGE_MODE_NONE) { + page = (u64 *)get_zeroed_page(gfp); + + if (!page) + return NULL; + + __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); + + /* pte could have been changed somewhere. */ + if (cmpxchg64(pte, __pte, __npte) != __pte) + free_page((unsigned long)page); + else if (IOMMU_PTE_PRESENT(__pte)) + *updated = true; + + continue; + } + + /* No level skipping support yet */ + if (pte_level != level) + return NULL; + + level -= 1; + + pte = IOMMU_PTE_PAGE(__pte); + + if (pte_page && level == end_lvl) + *pte_page = pte; + + pte = &pte[PM_LEVEL_INDEX(level, address)]; + } + + return pte; +} + +/* + * This function checks if there is a PTE for a given dma address. If + * there is one, it returns the pointer to it. + */ +u64 *fetch_pte(struct protection_domain *domain, + unsigned long address, + unsigned long *page_size) +{ + struct domain_pgtable pgtable; + int level; + u64 *pte; + + *page_size = 0; + + amd_iommu_domain_get_pgtable(domain, &pgtable); + + if (address > PM_LEVEL_SIZE(pgtable.mode)) + return NULL; + + level = pgtable.mode - 1; + pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; + *page_size = PTE_LEVEL_PAGE_SIZE(level); + + while (level > 0) { + + /* Not Present */ + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + /* Large PTE */ + if (PM_PTE_LEVEL(*pte) == 7 || + PM_PTE_LEVEL(*pte) == 0) + break; + + /* No level skipping support yet */ + if (PM_PTE_LEVEL(*pte) != level) + return NULL; + + level -= 1; + + /* Walk to the next level */ + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[PM_LEVEL_INDEX(level, address)]; + *page_size = PTE_LEVEL_PAGE_SIZE(level); + } + + /* + * If we have a series of large PTEs, make + * sure to return a pointer to the first one. + */ + if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) + pte = first_pte_l7(pte, page_size, NULL); + + return pte; +} + +static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) +{ + unsigned long pt; + int mode; + + while (cmpxchg64(pte, pteval, 0) != pteval) { + pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); + pteval = *pte; + } + + if (!IOMMU_PTE_PRESENT(pteval)) + return freelist; + + pt = (unsigned long)IOMMU_PTE_PAGE(pteval); + mode = IOMMU_PTE_MODE(pteval); + + return free_sub_pt(pt, mode, freelist); +} + +/* + * Generic mapping functions. It maps a physical address into a DMA + * address space. It allocates the page table pages if necessary. + * In the future it can be extended to a generic mapping function + * supporting all features of AMD IOMMU page tables like level skipping + * and full 64 bit address spaces. + */ +int iommu_map_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long phys_addr, + unsigned long page_size, + int prot, + gfp_t gfp) +{ + struct page *freelist = NULL; + bool updated = false; + u64 __pte, *pte; + int ret, i, count; + + BUG_ON(!IS_ALIGNED(bus_addr, page_size)); + BUG_ON(!IS_ALIGNED(phys_addr, page_size)); + + ret = -EINVAL; + if (!(prot & IOMMU_PROT_MASK)) + goto out; + + count = PAGE_SIZE_PTE_COUNT(page_size); + pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated); + + ret = -ENOMEM; + if (!pte) + goto out; + + for (i = 0; i < count; ++i) + freelist = free_clear_pte(&pte[i], pte[i], freelist); + + if (freelist != NULL) + updated = true; + + if (count > 1) { + __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); + __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; + } else + __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC; + + if (prot & IOMMU_PROT_IR) + __pte |= IOMMU_PTE_IR; + if (prot & IOMMU_PROT_IW) + __pte |= IOMMU_PTE_IW; + + for (i = 0; i < count; ++i) + pte[i] = __pte; + + ret = 0; + +out: + if (updated) { + unsigned long flags; + + spin_lock_irqsave(&dom->lock, flags); + /* + * Flush domain TLB(s) and wait for completion. Any Device-Table + * Updates and flushing already happened in + * increase_address_space(). + */ + amd_iommu_domain_flush_tlb_pde(dom); + amd_iommu_domain_flush_complete(dom); + spin_unlock_irqrestore(&dom->lock, flags); + } + + /* Everything flushed out, free pages now */ + free_page_list(freelist); + + return ret; +} + +unsigned long iommu_unmap_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long page_size) +{ + unsigned long long unmapped; + unsigned long unmap_size; + u64 *pte; + + BUG_ON(!is_power_of_2(page_size)); + + unmapped = 0; + + while (unmapped < page_size) { + + pte = fetch_pte(dom, bus_addr, &unmap_size); + + if (pte) { + int i, count; + + count = PAGE_SIZE_PTE_COUNT(unmap_size); + for (i = 0; i < count; i++) + pte[i] = 0ULL; + } + + bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; + unmapped += unmap_size; + } + + BUG_ON(unmapped && !is_power_of_2(unmapped)); + + return unmapped; +} + /* * ---------------------------------------------------- */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 76e69c510db8..eda8e196fb14 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -138,8 +138,8 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } -static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, - struct domain_pgtable *pgtable) +void amd_iommu_domain_get_pgtable(struct protection_domain *domain, + struct domain_pgtable *pgtable) { u64 pt_root = atomic64_read(&domain->iop.pt_root); @@ -147,18 +147,6 @@ static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ } -static void amd_iommu_domain_set_pgtable(struct protection_domain *domain, - u64 *root, int mode) -{ - u64 pt_root; - - /* lowest 3 bits encode pgtable mode */ - pt_root = mode & 7; - pt_root |= (u64)root; - - amd_iommu_domain_set_pt_root(domain, pt_root); -} - static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -414,29 +402,6 @@ static void amd_iommu_uninit_device(struct device *dev) */ } -/* - * Helper function to get the first pte of a large mapping - */ -static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, - unsigned long *count) -{ - unsigned long pte_mask, pg_size, cnt; - u64 *fpte; - - pg_size = PTE_PAGE_SIZE(*pte); - cnt = PAGE_SIZE_PTE_COUNT(pg_size); - pte_mask = ~((cnt << 3) - 1); - fpte = (u64 *)(((unsigned long)pte) & pte_mask); - - if (page_size) - *page_size = pg_size; - - if (count) - *count = cnt; - - return fpte; -} - /**************************************************************************** * * Interrupt handling functions @@ -1359,443 +1324,6 @@ static void domain_flush_devices(struct protection_domain *domain) device_flush_dte(dev_data); } -/**************************************************************************** - * - * The functions below are used the create the page table mappings for - * unity mapped regions. - * - ****************************************************************************/ - -static void free_page_list(struct page *freelist) -{ - while (freelist != NULL) { - unsigned long p = (unsigned long)page_address(freelist); - freelist = freelist->freelist; - free_page(p); - } -} - -static struct page *free_pt_page(unsigned long pt, struct page *freelist) -{ - struct page *p = virt_to_page((void *)pt); - - p->freelist = freelist; - - return p; -} - -#define DEFINE_FREE_PT_FN(LVL, FN) \ -static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ -{ \ - unsigned long p; \ - u64 *pt; \ - int i; \ - \ - pt = (u64 *)__pt; \ - \ - for (i = 0; i < 512; ++i) { \ - /* PTE present? */ \ - if (!IOMMU_PTE_PRESENT(pt[i])) \ - continue; \ - \ - /* Large PTE? */ \ - if (PM_PTE_LEVEL(pt[i]) == 0 || \ - PM_PTE_LEVEL(pt[i]) == 7) \ - continue; \ - \ - p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ - freelist = FN(p, freelist); \ - } \ - \ - return free_pt_page((unsigned long)pt, freelist); \ -} - -DEFINE_FREE_PT_FN(l2, free_pt_page) -DEFINE_FREE_PT_FN(l3, free_pt_l2) -DEFINE_FREE_PT_FN(l4, free_pt_l3) -DEFINE_FREE_PT_FN(l5, free_pt_l4) -DEFINE_FREE_PT_FN(l6, free_pt_l5) - -static struct page *free_sub_pt(unsigned long root, int mode, - struct page *freelist) -{ - switch (mode) { - case PAGE_MODE_NONE: - case PAGE_MODE_7_LEVEL: - break; - case PAGE_MODE_1_LEVEL: - freelist = free_pt_page(root, freelist); - break; - case PAGE_MODE_2_LEVEL: - freelist = free_pt_l2(root, freelist); - break; - case PAGE_MODE_3_LEVEL: - freelist = free_pt_l3(root, freelist); - break; - case PAGE_MODE_4_LEVEL: - freelist = free_pt_l4(root, freelist); - break; - case PAGE_MODE_5_LEVEL: - freelist = free_pt_l5(root, freelist); - break; - case PAGE_MODE_6_LEVEL: - freelist = free_pt_l6(root, freelist); - break; - default: - BUG(); - } - - return freelist; -} - -static void free_pagetable(struct domain_pgtable *pgtable) -{ - struct page *freelist = NULL; - unsigned long root; - - if (pgtable->mode == PAGE_MODE_NONE) - return; - - BUG_ON(pgtable->mode < PAGE_MODE_NONE || - pgtable->mode > PAGE_MODE_6_LEVEL); - - root = (unsigned long)pgtable->root; - freelist = free_sub_pt(root, pgtable->mode, freelist); - - free_page_list(freelist); -} - -/* - * This function is used to add another level to an IO page table. Adding - * another level increases the size of the address space by 9 bits to a size up - * to 64 bits. - */ -static bool increase_address_space(struct protection_domain *domain, - unsigned long address, - gfp_t gfp) -{ - struct domain_pgtable pgtable; - unsigned long flags; - bool ret = true; - u64 *pte; - - spin_lock_irqsave(&domain->lock, flags); - - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (address <= PM_LEVEL_SIZE(pgtable.mode)) - goto out; - - ret = false; - if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) - goto out; - - pte = (void *)get_zeroed_page(gfp); - if (!pte) - goto out; - - *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); - - pgtable.root = pte; - pgtable.mode += 1; - amd_iommu_update_and_flush_device_table(domain); - amd_iommu_domain_flush_complete(domain); - - /* - * Device Table needs to be updated and flushed before the new root can - * be published. - */ - amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode); - - ret = true; - -out: - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -static u64 *alloc_pte(struct protection_domain *domain, - unsigned long address, - unsigned long page_size, - u64 **pte_page, - gfp_t gfp, - bool *updated) -{ - struct domain_pgtable pgtable; - int level, end_lvl; - u64 *pte, *page; - - BUG_ON(!is_power_of_2(page_size)); - - amd_iommu_domain_get_pgtable(domain, &pgtable); - - while (address > PM_LEVEL_SIZE(pgtable.mode)) { - /* - * Return an error if there is no memory to update the - * page-table. - */ - if (!increase_address_space(domain, address, gfp)) - return NULL; - - /* Read new values to check if update was successful */ - amd_iommu_domain_get_pgtable(domain, &pgtable); - } - - - level = pgtable.mode - 1; - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; - address = PAGE_SIZE_ALIGN(address, page_size); - end_lvl = PAGE_SIZE_LEVEL(page_size); - - while (level > end_lvl) { - u64 __pte, __npte; - int pte_level; - - __pte = *pte; - pte_level = PM_PTE_LEVEL(__pte); - - /* - * If we replace a series of large PTEs, we need - * to tear down all of them. - */ - if (IOMMU_PTE_PRESENT(__pte) && - pte_level == PAGE_MODE_7_LEVEL) { - unsigned long count, i; - u64 *lpte; - - lpte = first_pte_l7(pte, NULL, &count); - - /* - * Unmap the replicated PTEs that still match the - * original large mapping - */ - for (i = 0; i < count; ++i) - cmpxchg64(&lpte[i], __pte, 0ULL); - - *updated = true; - continue; - } - - if (!IOMMU_PTE_PRESENT(__pte) || - pte_level == PAGE_MODE_NONE) { - page = (u64 *)get_zeroed_page(gfp); - - if (!page) - return NULL; - - __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); - - /* pte could have been changed somewhere. */ - if (cmpxchg64(pte, __pte, __npte) != __pte) - free_page((unsigned long)page); - else if (IOMMU_PTE_PRESENT(__pte)) - *updated = true; - - continue; - } - - /* No level skipping support yet */ - if (pte_level != level) - return NULL; - - level -= 1; - - pte = IOMMU_PTE_PAGE(__pte); - - if (pte_page && level == end_lvl) - *pte_page = pte; - - pte = &pte[PM_LEVEL_INDEX(level, address)]; - } - - return pte; -} - -/* - * This function checks if there is a PTE for a given dma address. If - * there is one, it returns the pointer to it. - */ -static u64 *fetch_pte(struct protection_domain *domain, - unsigned long address, - unsigned long *page_size) -{ - struct domain_pgtable pgtable; - int level; - u64 *pte; - - *page_size = 0; - - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (address > PM_LEVEL_SIZE(pgtable.mode)) - return NULL; - - level = pgtable.mode - 1; - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; - *page_size = PTE_LEVEL_PAGE_SIZE(level); - - while (level > 0) { - - /* Not Present */ - if (!IOMMU_PTE_PRESENT(*pte)) - return NULL; - - /* Large PTE */ - if (PM_PTE_LEVEL(*pte) == 7 || - PM_PTE_LEVEL(*pte) == 0) - break; - - /* No level skipping support yet */ - if (PM_PTE_LEVEL(*pte) != level) - return NULL; - - level -= 1; - - /* Walk to the next level */ - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[PM_LEVEL_INDEX(level, address)]; - *page_size = PTE_LEVEL_PAGE_SIZE(level); - } - - /* - * If we have a series of large PTEs, make - * sure to return a pointer to the first one. - */ - if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) - pte = first_pte_l7(pte, page_size, NULL); - - return pte; -} - -static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) -{ - unsigned long pt; - int mode; - - while (cmpxchg64(pte, pteval, 0) != pteval) { - pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); - pteval = *pte; - } - - if (!IOMMU_PTE_PRESENT(pteval)) - return freelist; - - pt = (unsigned long)IOMMU_PTE_PAGE(pteval); - mode = IOMMU_PTE_MODE(pteval); - - return free_sub_pt(pt, mode, freelist); -} - -/* - * Generic mapping functions. It maps a physical address into a DMA - * address space. It allocates the page table pages if necessary. - * In the future it can be extended to a generic mapping function - * supporting all features of AMD IOMMU page tables like level skipping - * and full 64 bit address spaces. - */ -static int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - unsigned long page_size, - int prot, - gfp_t gfp) -{ - struct page *freelist = NULL; - bool updated = false; - u64 __pte, *pte; - int ret, i, count; - - BUG_ON(!IS_ALIGNED(bus_addr, page_size)); - BUG_ON(!IS_ALIGNED(phys_addr, page_size)); - - ret = -EINVAL; - if (!(prot & IOMMU_PROT_MASK)) - goto out; - - count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated); - - ret = -ENOMEM; - if (!pte) - goto out; - - for (i = 0; i < count; ++i) - freelist = free_clear_pte(&pte[i], pte[i], freelist); - - if (freelist != NULL) - updated = true; - - if (count > 1) { - __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); - __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; - } else - __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC; - - if (prot & IOMMU_PROT_IR) - __pte |= IOMMU_PTE_IR; - if (prot & IOMMU_PROT_IW) - __pte |= IOMMU_PTE_IW; - - for (i = 0; i < count; ++i) - pte[i] = __pte; - - ret = 0; - -out: - if (updated) { - unsigned long flags; - - spin_lock_irqsave(&dom->lock, flags); - /* - * Flush domain TLB(s) and wait for completion. Any Device-Table - * Updates and flushing already happened in - * increase_address_space(). - */ - amd_iommu_domain_flush_tlb_pde(dom); - amd_iommu_domain_flush_complete(dom); - spin_unlock_irqrestore(&dom->lock, flags); - } - - /* Everything flushed out, free pages now */ - free_page_list(freelist); - - return ret; -} - -static unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size) -{ - unsigned long long unmapped; - unsigned long unmap_size; - u64 *pte; - - BUG_ON(!is_power_of_2(page_size)); - - unmapped = 0; - - while (unmapped < page_size) { - - pte = fetch_pte(dom, bus_addr, &unmap_size); - - if (pte) { - int i, count; - - count = PAGE_SIZE_PTE_COUNT(unmap_size); - for (i = 0; i < count; i++) - pte[i] = 0ULL; - } - - bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; - unmapped += unmap_size; - } - - BUG_ON(unmapped && !is_power_of_2(unmapped)); - - return unmapped; -} - /**************************************************************************** * * The next functions belong to the domain allocation. A domain is From e42ba0633064ef23eb1c8c21edf96bac1541bd4b Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:36:59 -0600 Subject: [PATCH 193/414] iommu/amd: Restructure code for freeing page table By consolidate logic into v1_free_pgtable helper function, which is called from IO page table framework. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-8-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 - drivers/iommu/amd/io_pgtable.c | 41 ++++++++++++++++++++-------------- drivers/iommu/amd/iommu.c | 21 ++++------------- 3 files changed, 28 insertions(+), 35 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 1bad42a3c73c..91d098003f12 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -132,7 +132,6 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif /* TODO: These are temporary and will be removed once fully transition */ -extern void free_pagetable(struct domain_pgtable *pgtable); extern int iommu_map_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long phys_addr, diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index dbc0ad775d5e..a8fea06efb4f 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -157,23 +157,6 @@ static struct page *free_sub_pt(unsigned long root, int mode, return freelist; } -void free_pagetable(struct domain_pgtable *pgtable) -{ - struct page *freelist = NULL; - unsigned long root; - - if (pgtable->mode == PAGE_MODE_NONE) - return; - - BUG_ON(pgtable->mode < PAGE_MODE_NONE || - pgtable->mode > PAGE_MODE_6_LEVEL); - - root = (unsigned long)pgtable->root; - freelist = free_sub_pt(root, pgtable->mode, freelist); - - free_page_list(freelist); -} - void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode) { @@ -522,6 +505,30 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, */ static void v1_free_pgtable(struct io_pgtable *iop) { + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); + struct protection_domain *dom; + struct page *freelist = NULL; + unsigned long root; + + if (pgtable->mode == PAGE_MODE_NONE) + return; + + dom = container_of(pgtable, struct protection_domain, iop); + + /* Update data structure */ + amd_iommu_domain_clr_pt_root(dom); + + /* Make changes visible to IOMMUs */ + amd_iommu_domain_update(dom); + + /* Page-table is not visible to IOMMU anymore, so free it */ + BUG_ON(pgtable->mode < PAGE_MODE_NONE || + pgtable->mode > PAGE_MODE_6_LEVEL); + + root = (unsigned long)pgtable->root; + freelist = free_sub_pt(root, pgtable->mode, freelist); + + free_page_list(freelist); } static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index eda8e196fb14..d02aeb17fd81 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1902,17 +1902,14 @@ static void cleanup_domain(struct protection_domain *domain) static void protection_domain_free(struct protection_domain *domain) { - struct domain_pgtable pgtable; - if (!domain) return; if (domain->id) domain_id_free(domain->id); - amd_iommu_domain_get_pgtable(domain, &pgtable); - amd_iommu_domain_clr_pt_root(domain); - free_pagetable(&pgtable); + if (domain->iop.pgtbl_cfg.tlb) + free_io_pgtable_ops(&domain->iop.iop.ops); kfree(domain); } @@ -2301,22 +2298,12 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); void amd_iommu_domain_direct_map(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; unsigned long flags; spin_lock_irqsave(&domain->lock, flags); - /* First save pgtable configuration*/ - amd_iommu_domain_get_pgtable(domain, &pgtable); - - /* Remove page-table from domain */ - amd_iommu_domain_clr_pt_root(domain); - - /* Make changes visible to IOMMUs */ - amd_iommu_domain_update(domain); - - /* Page-table is not visible to IOMMU anymore, so free it */ - free_pagetable(&pgtable); + if (domain->iop.pgtbl_cfg.tlb) + free_io_pgtable_ops(&domain->iop.iop.ops); spin_unlock_irqrestore(&domain->lock, flags); } From 6eedb59c18a3be2b670e0adc68e46d22ebd42823 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:37:00 -0600 Subject: [PATCH 194/414] iommu/amd: Remove amd_iommu_domain_get_pgtable Since the IO page table root and mode parameters have been moved into the struct amd_io_pg, the function is no longer needed. Therefore, remove it along with the struct domain_pgtable. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-9-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 4 ++-- drivers/iommu/amd/amd_iommu_types.h | 6 ----- drivers/iommu/amd/io_pgtable.c | 36 ++++++++++------------------- drivers/iommu/amd/iommu.c | 34 ++++----------------------- 4 files changed, 19 insertions(+), 61 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 91d098003f12..76276d9e463c 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -110,6 +110,8 @@ static inline void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) { atomic64_set(&domain->iop.pt_root, root); + domain->iop.root = (u64 *)(root & PAGE_MASK); + domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */ } static inline @@ -144,8 +146,6 @@ extern unsigned long iommu_unmap_page(struct protection_domain *dom, extern u64 *fetch_pte(struct protection_domain *domain, unsigned long address, unsigned long *page_size); -extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain, - struct domain_pgtable *pgtable); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); #endif diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index a50f117879f2..8915c832448f 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -519,12 +519,6 @@ struct protection_domain { unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; -/* For decocded pt_root */ -struct domain_pgtable { - int mode; - u64 *root; -}; - /* * Structure where we save information about one hardware AMD IOMMU in the * system. diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index a8fea06efb4f..bd8c65a39b1c 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -178,30 +178,27 @@ static bool increase_address_space(struct protection_domain *domain, unsigned long address, gfp_t gfp) { - struct domain_pgtable pgtable; unsigned long flags; bool ret = true; u64 *pte; spin_lock_irqsave(&domain->lock, flags); - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (address <= PM_LEVEL_SIZE(pgtable.mode)) + if (address <= PM_LEVEL_SIZE(domain->iop.mode)) goto out; ret = false; - if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) + if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); if (!pte) goto out; - *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); + *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); - pgtable.root = pte; - pgtable.mode += 1; + domain->iop.root = pte; + domain->iop.mode += 1; amd_iommu_update_and_flush_device_table(domain); amd_iommu_domain_flush_complete(domain); @@ -209,7 +206,7 @@ static bool increase_address_space(struct protection_domain *domain, * Device Table needs to be updated and flushed before the new root can * be published. */ - amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode); + amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); ret = true; @@ -226,29 +223,23 @@ static u64 *alloc_pte(struct protection_domain *domain, gfp_t gfp, bool *updated) { - struct domain_pgtable pgtable; int level, end_lvl; u64 *pte, *page; BUG_ON(!is_power_of_2(page_size)); - amd_iommu_domain_get_pgtable(domain, &pgtable); - - while (address > PM_LEVEL_SIZE(pgtable.mode)) { + while (address > PM_LEVEL_SIZE(domain->iop.mode)) { /* * Return an error if there is no memory to update the * page-table. */ if (!increase_address_space(domain, address, gfp)) return NULL; - - /* Read new values to check if update was successful */ - amd_iommu_domain_get_pgtable(domain, &pgtable); } - level = pgtable.mode - 1; - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; + level = domain->iop.mode - 1; + pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)]; address = PAGE_SIZE_ALIGN(address, page_size); end_lvl = PAGE_SIZE_LEVEL(page_size); @@ -324,19 +315,16 @@ u64 *fetch_pte(struct protection_domain *domain, unsigned long address, unsigned long *page_size) { - struct domain_pgtable pgtable; int level; u64 *pte; *page_size = 0; - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (address > PM_LEVEL_SIZE(pgtable.mode)) + if (address > PM_LEVEL_SIZE(domain->iop.mode)) return NULL; - level = pgtable.mode - 1; - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; + level = domain->iop.mode - 1; + pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)]; *page_size = PTE_LEVEL_PAGE_SIZE(level); while (level > 0) { diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index d02aeb17fd81..bba3d1802b50 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -138,15 +138,6 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } -void amd_iommu_domain_get_pgtable(struct protection_domain *domain, - struct domain_pgtable *pgtable) -{ - u64 pt_root = atomic64_read(&domain->iop.pt_root); - - pgtable->root = (u64 *)(pt_root & PAGE_MASK); - pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ -} - static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -1483,7 +1474,6 @@ static void clear_dte_entry(u16 devid) static void do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { - struct domain_pgtable pgtable; struct amd_iommu *iommu; bool ats; @@ -1499,7 +1489,6 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_cnt += 1; /* Update device table */ - amd_iommu_domain_get_pgtable(domain, &pgtable); set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); clone_aliases(dev_data->pdev); @@ -1826,10 +1815,7 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) void amd_iommu_domain_update(struct protection_domain *domain) { - struct domain_pgtable pgtable; - /* Update device table */ - amd_iommu_domain_get_pgtable(domain, &pgtable); amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ @@ -2079,12 +2065,10 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, gfp_t gfp) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; int prot = 0; int ret; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode == PAGE_MODE_NONE) + if (domain->iop.mode == PAGE_MODE_NONE) return -EINVAL; if (iommu_prot & IOMMU_READ) @@ -2104,10 +2088,8 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode == PAGE_MODE_NONE) + if (domain->iop.mode == PAGE_MODE_NONE) return 0; return iommu_unmap_page(domain, iova, page_size); @@ -2118,11 +2100,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, { struct protection_domain *domain = to_pdomain(dom); unsigned long offset_mask, pte_pgsize; - struct domain_pgtable pgtable; u64 *pte, __pte; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode == PAGE_MODE_NONE) + if (domain->iop.mode == PAGE_MODE_NONE) return iova; pte = fetch_pte(domain, iova, &pte_pgsize); @@ -2492,11 +2472,9 @@ static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc) static int __set_gcr3(struct protection_domain *domain, u32 pasid, unsigned long cr3) { - struct domain_pgtable pgtable; u64 *pte; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode != PAGE_MODE_NONE) + if (domain->iop.mode != PAGE_MODE_NONE) return -EINVAL; pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); @@ -2510,11 +2488,9 @@ static int __set_gcr3(struct protection_domain *domain, u32 pasid, static int __clear_gcr3(struct protection_domain *domain, u32 pasid) { - struct domain_pgtable pgtable; u64 *pte; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode != PAGE_MODE_NONE) + if (domain->iop.mode != PAGE_MODE_NONE) return -EINVAL; pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); From 33aef9786046d9a5744cd1e8d5d0ce800d611fdc Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:37:01 -0600 Subject: [PATCH 195/414] iommu/amd: Rename variables to be consistent with struct io_pgtable_ops There is no functional change. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-10-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/io_pgtable.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index bd8c65a39b1c..af6b7f11ebc3 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -387,9 +387,9 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) * and full 64 bit address spaces. */ int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - unsigned long page_size, + unsigned long iova, + unsigned long paddr, + unsigned long size, int prot, gfp_t gfp) { @@ -398,15 +398,15 @@ int iommu_map_page(struct protection_domain *dom, u64 __pte, *pte; int ret, i, count; - BUG_ON(!IS_ALIGNED(bus_addr, page_size)); - BUG_ON(!IS_ALIGNED(phys_addr, page_size)); + BUG_ON(!IS_ALIGNED(iova, size)); + BUG_ON(!IS_ALIGNED(paddr, size)); ret = -EINVAL; if (!(prot & IOMMU_PROT_MASK)) goto out; - count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated); + count = PAGE_SIZE_PTE_COUNT(size); + pte = alloc_pte(dom, iova, size, NULL, gfp, &updated); ret = -ENOMEM; if (!pte) @@ -419,10 +419,10 @@ int iommu_map_page(struct protection_domain *dom, updated = true; if (count > 1) { - __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); + __pte = PAGE_SIZE_PTE(__sme_set(paddr), size); __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; } else - __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC; + __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; if (prot & IOMMU_PROT_IR) __pte |= IOMMU_PTE_IR; @@ -456,20 +456,19 @@ int iommu_map_page(struct protection_domain *dom, } unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size) + unsigned long iova, + unsigned long size) { unsigned long long unmapped; unsigned long unmap_size; u64 *pte; - BUG_ON(!is_power_of_2(page_size)); + BUG_ON(!is_power_of_2(size)); unmapped = 0; - while (unmapped < page_size) { - - pte = fetch_pte(dom, bus_addr, &unmap_size); + while (unmapped < size) { + pte = fetch_pte(dom, iova, &unmap_size); if (pte) { int i, count; @@ -479,7 +478,7 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, pte[i] = 0ULL; } - bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; + iova = (iova & ~(unmap_size - 1)) + unmap_size; unmapped += unmap_size; } From 0633bbcc1eedb7015554254d54e14602b1d8b989 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:37:02 -0600 Subject: [PATCH 196/414] iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable To simplify the fetch_pte function. There is no functional change. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-11-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/io_pgtable.c | 13 +++++++------ drivers/iommu/amd/iommu.c | 4 +++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 76276d9e463c..83ca822c5349 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -143,7 +143,7 @@ extern int iommu_map_page(struct protection_domain *dom, extern unsigned long iommu_unmap_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long page_size); -extern u64 *fetch_pte(struct protection_domain *domain, +extern u64 *fetch_pte(struct amd_io_pgtable *pgtable, unsigned long address, unsigned long *page_size); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index af6b7f11ebc3..d7924eb20178 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -311,7 +311,7 @@ static u64 *alloc_pte(struct protection_domain *domain, * This function checks if there is a PTE for a given dma address. If * there is one, it returns the pointer to it. */ -u64 *fetch_pte(struct protection_domain *domain, +u64 *fetch_pte(struct amd_io_pgtable *pgtable, unsigned long address, unsigned long *page_size) { @@ -320,11 +320,11 @@ u64 *fetch_pte(struct protection_domain *domain, *page_size = 0; - if (address > PM_LEVEL_SIZE(domain->iop.mode)) + if (address > PM_LEVEL_SIZE(pgtable->mode)) return NULL; - level = domain->iop.mode - 1; - pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)]; + level = pgtable->mode - 1; + pte = &pgtable->root[PM_LEVEL_INDEX(level, address)]; *page_size = PTE_LEVEL_PAGE_SIZE(level); while (level > 0) { @@ -459,6 +459,8 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, unsigned long iova, unsigned long size) { + struct io_pgtable_ops *ops = &dom->iop.iop.ops; + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; unsigned long unmap_size; u64 *pte; @@ -468,8 +470,7 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, unmapped = 0; while (unmapped < size) { - pte = fetch_pte(dom, iova, &unmap_size); - + pte = fetch_pte(pgtable, iova, &unmap_size); if (pte) { int i, count; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index bba3d1802b50..f1a4f535eac8 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2099,13 +2099,15 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) { struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long offset_mask, pte_pgsize; u64 *pte, __pte; if (domain->iop.mode == PAGE_MODE_NONE) return iova; - pte = fetch_pte(domain, iova, &pte_pgsize); + pte = fetch_pte(pgtable, iova, &pte_pgsize); if (!pte || !IOMMU_PTE_PRESENT(*pte)) return 0; From 441555c63aca3300a0f6cd5948dbf5bd6e7760b3 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:37:03 -0600 Subject: [PATCH 197/414] iommu/amd: Introduce iommu_v1_iova_to_phys This implements iova_to_phys for AMD IOMMU v1 pagetable, which will be used by the IO page table framework. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-12-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/io_pgtable.c | 22 ++++++++++++++++++++++ drivers/iommu/amd/iommu.c | 16 +--------------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index d7924eb20178..b70eb79d05fb 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -488,6 +488,26 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, return unmapped; } +static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + if (pgtable->mode == PAGE_MODE_NONE) + return iova; + + pte = fetch_pte(pgtable, iova, &pte_pgsize); + + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = __sme_clr(*pte & PM_ADDR_MASK); + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + /* * ---------------------------------------------------- */ @@ -528,6 +548,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, cfg->tlb = &v1_flush_ops; + pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; + return &pgtable->iop; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f1a4f535eac8..144ea91a9ec9 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2100,22 +2100,8 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; - struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); - unsigned long offset_mask, pte_pgsize; - u64 *pte, __pte; - if (domain->iop.mode == PAGE_MODE_NONE) - return iova; - - pte = fetch_pte(pgtable, iova, &pte_pgsize); - - if (!pte || !IOMMU_PTE_PRESENT(*pte)) - return 0; - - offset_mask = pte_pgsize - 1; - __pte = __sme_clr(*pte & PM_ADDR_MASK); - - return (__pte & ~offset_mask) | (iova & offset_mask); + return ops->iova_to_phys(ops, iova); } static bool amd_iommu_capable(enum iommu_cap cap) From fd86c9501a9b3e5eb7e46b03f03ffa4bf892b523 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:37:04 -0600 Subject: [PATCH 198/414] iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page These implement map and unmap for AMD IOMMU v1 pagetable, which will be used by the IO pagetable framework. Also clean up unused extern function declarations. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-13-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 13 ------------- drivers/iommu/amd/io_pgtable.c | 25 ++++++++++++------------- drivers/iommu/amd/iommu.c | 13 ++++++++----- 3 files changed, 20 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 83ca822c5349..3770b1a4d51c 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -133,19 +133,6 @@ void amd_iommu_apply_ivrs_quirks(void); static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif -/* TODO: These are temporary and will be removed once fully transition */ -extern int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - unsigned long page_size, - int prot, - gfp_t gfp); -extern unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size); -extern u64 *fetch_pte(struct amd_io_pgtable *pgtable, - unsigned long address, - unsigned long *page_size); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); #endif diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index b70eb79d05fb..1c4961e05c12 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -311,9 +311,9 @@ static u64 *alloc_pte(struct protection_domain *domain, * This function checks if there is a PTE for a given dma address. If * there is one, it returns the pointer to it. */ -u64 *fetch_pte(struct amd_io_pgtable *pgtable, - unsigned long address, - unsigned long *page_size) +static u64 *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long address, + unsigned long *page_size) { int level; u64 *pte; @@ -386,13 +386,10 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) * supporting all features of AMD IOMMU page tables like level skipping * and full 64 bit address spaces. */ -int iommu_map_page(struct protection_domain *dom, - unsigned long iova, - unsigned long paddr, - unsigned long size, - int prot, - gfp_t gfp) +static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { + struct protection_domain *dom = io_pgtable_ops_to_domain(ops); struct page *freelist = NULL; bool updated = false; u64 __pte, *pte; @@ -455,11 +452,11 @@ int iommu_map_page(struct protection_domain *dom, return ret; } -unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long iova, - unsigned long size) +static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, + unsigned long iova, + size_t size, + struct iommu_iotlb_gather *gather) { - struct io_pgtable_ops *ops = &dom->iop.iop.ops; struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; unsigned long unmap_size; @@ -548,6 +545,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, cfg->tlb = &v1_flush_ops; + pgtable->iop.ops.map = iommu_v1_map_page; + pgtable->iop.ops.unmap = iommu_v1_unmap_page; pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; return &pgtable->iop; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 144ea91a9ec9..256d38ad6afb 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2065,8 +2065,9 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, gfp_t gfp) { struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; int prot = 0; - int ret; + int ret = -EINVAL; if (domain->iop.mode == PAGE_MODE_NONE) return -EINVAL; @@ -2076,9 +2077,10 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, if (iommu_prot & IOMMU_WRITE) prot |= IOMMU_PROT_IW; - ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp); - - domain_flush_np_cache(domain, iova, page_size); + if (ops->map) { + ret = ops->map(ops, iova, paddr, page_size, prot, gfp); + domain_flush_np_cache(domain, iova, page_size); + } return ret; } @@ -2088,11 +2090,12 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; if (domain->iop.mode == PAGE_MODE_NONE) return 0; - return iommu_unmap_page(domain, iova, page_size); + return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0; } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, From 89c9a09cb9f6e0a7df77f9c9bafd3c96148bf0d5 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Dec 2020 01:37:05 -0600 Subject: [PATCH 199/414] iommu/amd: Adopt IO page table framework for AMD IOMMU v1 page table Switch to using IO page table framework for AMD IOMMU v1 page table. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201215073705.123786-14-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/init.c | 2 ++ drivers/iommu/amd/iommu.c | 48 ++++++++++++++++++++++++++--------- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 3770b1a4d51c..91452e0ff072 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -36,6 +36,7 @@ extern void amd_iommu_disable(void); extern int amd_iommu_reenable(int); extern int amd_iommu_enable_faulting(void); extern int amd_iommu_guest_ir; +extern enum io_pgtable_fmt amd_iommu_pgtable; /* IOMMUv2 specific functions */ struct iommu_domain; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1804c3e3b6ac..5a6c511e91d1 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -147,6 +147,8 @@ struct ivmd_header { bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; +enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; + int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 256d38ad6afb..a69a8b573e40 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -1900,7 +1901,7 @@ static void protection_domain_free(struct protection_domain *domain) kfree(domain); } -static int protection_domain_init(struct protection_domain *domain, int mode) +static int protection_domain_init_v1(struct protection_domain *domain, int mode) { u64 *pt_root = NULL; @@ -1923,34 +1924,55 @@ static int protection_domain_init(struct protection_domain *domain, int mode) return 0; } -static struct protection_domain *protection_domain_alloc(int mode) +static struct protection_domain *protection_domain_alloc(unsigned int type) { + struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; + int pgtable = amd_iommu_pgtable; + int mode = DEFAULT_PGTABLE_LEVEL; + int ret; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; - if (protection_domain_init(domain, mode)) + /* + * Force IOMMU v1 page table when iommu=pt and + * when allocating domain for pass-through devices. + */ + if (type == IOMMU_DOMAIN_IDENTITY) { + pgtable = AMD_IOMMU_V1; + mode = PAGE_MODE_NONE; + } else if (type == IOMMU_DOMAIN_UNMANAGED) { + pgtable = AMD_IOMMU_V1; + } + + switch (pgtable) { + case AMD_IOMMU_V1: + ret = protection_domain_init_v1(domain, mode); + break; + default: + ret = -EINVAL; + } + + if (ret) + goto out_err; + + pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); + if (!pgtbl_ops) goto out_err; return domain; - out_err: kfree(domain); - return NULL; } static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { struct protection_domain *domain; - int mode = DEFAULT_PGTABLE_LEVEL; - if (type == IOMMU_DOMAIN_IDENTITY) - mode = PAGE_MODE_NONE; - - domain = protection_domain_alloc(mode); + domain = protection_domain_alloc(type); if (!domain) return NULL; @@ -2069,7 +2091,8 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, int prot = 0; int ret = -EINVAL; - if (domain->iop.mode == PAGE_MODE_NONE) + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + (domain->iop.mode == PAGE_MODE_NONE)) return -EINVAL; if (iommu_prot & IOMMU_READ) @@ -2092,7 +2115,8 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; - if (domain->iop.mode == PAGE_MODE_NONE) + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + (domain->iop.mode == PAGE_MODE_NONE)) return 0; return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0; From 96667052149da3855c4361925324b690c687152f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 25 Jan 2021 14:05:27 +0200 Subject: [PATCH 200/414] tools/testing/scatterlist: Fix overflow of max segment size Because SCATTERLIST_MAX_SEGMENT was removed and replaced with UINT_MAX, the test overflows the max_sgement variable. Remove this case. Fixes: 7a60c2dd0f57 ("drm: Remove SCATTERLIST_MAX_SEGMENT") Link: https://lore.kernel.org/r/20210125120527.836363-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- tools/testing/scatterlist/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index 71c960dcd8a4..652254754b4c 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -55,7 +55,6 @@ int main(void) struct test *test, tests[] = { { -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 }, { 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 }, - { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 }, { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 }, { 0, 1, pfn(0), NULL, 1, sgmax, 1 }, { 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 }, From 2614488d1f3cd5989375042286b11424208e20c8 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 25 Jan 2021 14:07:09 +0200 Subject: [PATCH 201/414] RDMA/mlx5: Allow creating all QPs even when non RDMA profile is used The cited commit disallowed creating any QP which isn't raw ethernet, reg umr or the special UD qp for testing WC, this proved too strict. While modify can't be done (no GIDS/GID table for example) just creating a QP is okay. This patch partially reverts the bellow mentioned commit and places the restriction at the modify QP stage and not at the creation. DEVX commands should be used to manipulate such QPs. Fixes: 42caf9cb5937 ("RDMA/mlx5: Allow only raw Ethernet QPs when RoCE isn't enabled") Link: https://lore.kernel.org/r/20210125120709.836718-1-leon@kernel.org Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index c916e48b2e52..88be94215708 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2432,9 +2432,6 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, case MLX5_IB_QPT_HW_GSI: case IB_QPT_DRIVER: case IB_QPT_GSI: - if (dev->profile == &raw_eth_profile) - goto out; - fallthrough; case IB_QPT_RAW_PACKET: case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: @@ -2629,10 +2626,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int create_flags = attr->create_flags; bool cond; - if (qp->type == IB_QPT_UD && dev->profile == &raw_eth_profile) - if (create_flags & ~MLX5_IB_QP_CREATE_WC_TEST) - return -EINVAL; - if (qp_type == MLX5_IB_QPT_DCT) return (create_flags) ? -EINVAL : 0; @@ -4211,6 +4204,23 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, return 0; } +static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + enum ib_qp_type qp_type) +{ + if (dev->profile != &raw_eth_profile) + return true; + + if (qp_type == IB_QPT_RAW_PACKET || qp_type == MLX5_IB_QPT_REG_UMR) + return true; + + /* Internal QP used for wc testing, with NOPs in wq */ + if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) + return true; + + return false; +} + int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -4223,6 +4233,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int err = -EINVAL; int port; + if (!mlx5_ib_modify_qp_allowed(dev, qp, ibqp->qp_type)) + return -EOPNOTSUPP; + if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) return -EOPNOTSUPP; From 4fc5461823c9cad547a9bdfbf17d13f0da0d6bb5 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 25 Jan 2021 14:13:38 +0200 Subject: [PATCH 202/414] IB/umad: Return EIO in case of when device disassociated MAD message received by the user has EINVAL error in all flows including when the device is disassociated. That makes it impossible for the applications to treat such flow differently. Change it to return EIO, so the applications will be able to perform disassociation recovery. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/r/20210125121339.837518-2-leon@kernel.org Signed-off-by: Shay Drory Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/user_mad.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 19104a675691..7ec1918431f7 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -379,6 +379,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, mutex_lock(&file->mutex); + if (file->agents_dead) { + mutex_unlock(&file->mutex); + return -EIO; + } + while (list_empty(&file->recv_list)) { mutex_unlock(&file->mutex); @@ -524,7 +529,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { - ret = -EINVAL; + ret = -EIO; goto err_up; } From def4cd43f522253645b72c97181399c241b54536 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 25 Jan 2021 14:13:39 +0200 Subject: [PATCH 203/414] IB/umad: Return EPOLLERR in case of when device disassociated Currently, polling a umad device will always works, even if the device was disassociated. A disassociated device should immediately return EPOLLERR from poll(). Otherwise userspace is endlessly hung on poll() with no idea that the device has been removed from the system. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/r/20210125121339.837518-3-leon@kernel.org Signed-off-by: Shay Drory Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/user_mad.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 7ec1918431f7..dd7f3b437c6b 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -397,6 +397,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, mutex_lock(&file->mutex); } + if (file->agents_dead) { + mutex_unlock(&file->mutex); + return -EIO; + } + packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); list_del(&packet->list); @@ -658,10 +663,14 @@ static __poll_t ib_umad_poll(struct file *filp, struct poll_table_struct *wait) /* we will always be able to post a MAD send */ __poll_t mask = EPOLLOUT | EPOLLWRNORM; + mutex_lock(&file->mutex); poll_wait(filp, &file->recv_wait, wait); if (!list_empty(&file->recv_list)) mask |= EPOLLIN | EPOLLRDNORM; + if (file->agents_dead) + mask = EPOLLERR; + mutex_unlock(&file->mutex); return mask; } @@ -1341,6 +1350,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port) list_for_each_entry(file, &port->file_list, port_list) { mutex_lock(&file->mutex); file->agents_dead = 1; + wake_up_interruptible(&file->recv_wait); mutex_unlock(&file->mutex); for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) From c4369575b2bc2993edf8223a8f5c9f510ee629d0 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 25 Jan 2021 15:16:36 -0600 Subject: [PATCH 204/414] RDMA/rxe: Fix bug in rxe_alloc() A recent patch which added an 'unlocked' version of rxe_alloc introduced a bug causing kzalloc(..., GFP_KERNEL) to be called while holding a spin lock. This patch corrects that error. rxe_alloc_nl() should always be called while holding the pool->pool_lock so the 2nd argument to kzalloc there should be GFP_ATOMIC. rxe_alloc() prior to the change only locked the code around checking that pool->state is RXE_POOL_STATE_VALID to avoid races between working threads and a thread shutting down the rxe driver. This patch reverts rxe_alloc() to this behavior so the lock is not held when kzalloc() is called. Link: https://lore.kernel.org/r/20210125211641.2694-2-rpearson@hpe.com Reported-by: syzbot+ec2fd72374785d0e558e@syzkaller.appspotmail.com Fixes: 3853c35e243d ("RDMA/rxe: Add unlocked versions of pool APIs") Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 57 +++++++++++++++++++++------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index d26730eec720..cfcd55175572 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -343,13 +343,54 @@ void *rxe_alloc_nl(struct rxe_pool *pool) struct rxe_pool_entry *elem; u8 *obj; - might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); - if (pool->state != RXE_POOL_STATE_VALID) return NULL; kref_get(&pool->ref_cnt); + if (!ib_device_try_get(&pool->rxe->ib_dev)) + goto out_put_pool; + + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) + goto out_cnt; + + obj = kzalloc(info->size, GFP_ATOMIC); + if (!obj) + goto out_cnt; + + elem = (struct rxe_pool_entry *)(obj + info->elem_offset); + + elem->pool = pool; + kref_init(&elem->ref_cnt); + + return obj; + +out_cnt: + atomic_dec(&pool->num_elem); + ib_device_put(&pool->rxe->ib_dev); +out_put_pool: + rxe_pool_put(pool); + return NULL; +} + +void *rxe_alloc(struct rxe_pool *pool) +{ + unsigned long flags; + struct rxe_type_info *info = &rxe_type_info[pool->type]; + struct rxe_pool_entry *elem; + u8 *obj; + + might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + + read_lock_irqsave(&pool->pool_lock, flags); + if (pool->state != RXE_POOL_STATE_VALID) { + read_unlock_irqrestore(&pool->pool_lock, flags); + return NULL; + } + + kref_get(&pool->ref_cnt); + read_unlock_irqrestore(&pool->pool_lock, flags); + if (!ib_device_try_get(&pool->rxe->ib_dev)) goto out_put_pool; @@ -376,18 +417,6 @@ void *rxe_alloc_nl(struct rxe_pool *pool) return NULL; } -void *rxe_alloc(struct rxe_pool *pool) -{ - u8 *obj; - unsigned long flags; - - read_lock_irqsave(&pool->pool_lock, flags); - obj = rxe_alloc_nl(pool); - read_unlock_irqrestore(&pool->pool_lock, flags); - - return obj; -} - int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) { unsigned long flags; From 88cc77eb8bd05fd185a7d142dedc6e406c5c35b6 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 25 Jan 2021 15:16:37 -0600 Subject: [PATCH 205/414] RDMA/rxe: Fix misleading comments and names The names and comments of the 'unlocked' pool APIs are very misleading and not what was intended. This patch replaces 'rxe_xxx_nl' with 'rxe_xxx_locked' with comments indicating that the caller is expected to hold the rxe pool lock. Link: https://lore.kernel.org/r/20210125211641.2694-3-rpearson@hpe.com Reported-by: Hillf Danton Suggested-by: Jason Gunthorpe Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mcast.c | 8 ++-- drivers/infiniband/sw/rxe/rxe_pool.c | 22 +++++------ drivers/infiniband/sw/rxe/rxe_pool.h | 55 +++++++++++++-------------- 3 files changed, 42 insertions(+), 43 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index 5be47ce7d319..0ea9a5aa4ec0 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -15,18 +15,18 @@ static struct rxe_mc_grp *create_grp(struct rxe_dev *rxe, int err; struct rxe_mc_grp *grp; - grp = rxe_alloc_nl(&rxe->mc_grp_pool); + grp = rxe_alloc_locked(&rxe->mc_grp_pool); if (!grp) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&grp->qp_list); spin_lock_init(&grp->mcg_lock); grp->rxe = rxe; - rxe_add_key_nl(grp, mgid); + rxe_add_key_locked(grp, mgid); err = rxe_mcast_add(rxe, mgid); if (unlikely(err)) { - rxe_drop_key_nl(grp); + rxe_drop_key_locked(grp); rxe_drop_ref(grp); return ERR_PTR(err); } @@ -47,7 +47,7 @@ int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, write_lock_irqsave(&pool->pool_lock, flags); - grp = rxe_pool_get_key_nl(pool, mgid); + grp = rxe_pool_get_key_locked(pool, mgid); if (grp) goto done; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index cfcd55175572..5ca54e09cd0e 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -266,7 +266,7 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) return; } -void __rxe_add_key_nl(struct rxe_pool_entry *elem, void *key) +void __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key) { struct rxe_pool *pool = elem->pool; @@ -280,11 +280,11 @@ void __rxe_add_key(struct rxe_pool_entry *elem, void *key) unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - __rxe_add_key_nl(elem, key); + __rxe_add_key_locked(elem, key); write_unlock_irqrestore(&pool->pool_lock, flags); } -void __rxe_drop_key_nl(struct rxe_pool_entry *elem) +void __rxe_drop_key_locked(struct rxe_pool_entry *elem) { struct rxe_pool *pool = elem->pool; @@ -297,11 +297,11 @@ void __rxe_drop_key(struct rxe_pool_entry *elem) unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - __rxe_drop_key_nl(elem); + __rxe_drop_key_locked(elem); write_unlock_irqrestore(&pool->pool_lock, flags); } -void __rxe_add_index_nl(struct rxe_pool_entry *elem) +void __rxe_add_index_locked(struct rxe_pool_entry *elem) { struct rxe_pool *pool = elem->pool; @@ -315,11 +315,11 @@ void __rxe_add_index(struct rxe_pool_entry *elem) unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - __rxe_add_index_nl(elem); + __rxe_add_index_locked(elem); write_unlock_irqrestore(&pool->pool_lock, flags); } -void __rxe_drop_index_nl(struct rxe_pool_entry *elem) +void __rxe_drop_index_locked(struct rxe_pool_entry *elem) { struct rxe_pool *pool = elem->pool; @@ -333,11 +333,11 @@ void __rxe_drop_index(struct rxe_pool_entry *elem) unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - __rxe_drop_index_nl(elem); + __rxe_drop_index_locked(elem); write_unlock_irqrestore(&pool->pool_lock, flags); } -void *rxe_alloc_nl(struct rxe_pool *pool) +void *rxe_alloc_locked(struct rxe_pool *pool) { struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rxe_pool_entry *elem; @@ -507,7 +507,7 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) return obj; } -void *rxe_pool_get_key_nl(struct rxe_pool *pool, void *key) +void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) { struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rb_node *node; @@ -551,7 +551,7 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key) unsigned long flags; read_lock_irqsave(&pool->pool_lock, flags); - obj = rxe_pool_get_key_nl(pool, key); + obj = rxe_pool_get_key_locked(pool, key); read_unlock_irqrestore(&pool->pool_lock, flags); return obj; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 373e08554c1c..a75ac2d2847a 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -106,11 +106,10 @@ int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, /* free resources from object pool */ void rxe_pool_cleanup(struct rxe_pool *pool); -/* allocate an object from pool */ -void *rxe_alloc(struct rxe_pool *pool); +/* allocate an object from pool holding and not holding the pool lock */ +void *rxe_alloc_locked(struct rxe_pool *pool); -/* allocate an object from pool - no lock */ -void *rxe_alloc_nl(struct rxe_pool *pool); +void *rxe_alloc(struct rxe_pool *pool); /* connect already allocated object to pool */ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); @@ -118,60 +117,60 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); #define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->pelem) /* assign an index to an indexed object and insert object into - * pool's rb tree with and without holding the pool_lock + * pool's rb tree holding and not holding the pool_lock */ +void __rxe_add_index_locked(struct rxe_pool_entry *elem); + +#define rxe_add_index_locked(obj) __rxe_add_index_locked(&(obj)->pelem) + void __rxe_add_index(struct rxe_pool_entry *elem); #define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem) -void __rxe_add_index_nl(struct rxe_pool_entry *elem); - -#define rxe_add_index_nl(obj) __rxe_add_index_nl(&(obj)->pelem) - /* drop an index and remove object from rb tree - * with and without holding the pool_lock + * holding and not holding the pool_lock */ +void __rxe_drop_index_locked(struct rxe_pool_entry *elem); + +#define rxe_drop_index_locked(obj) __rxe_drop_index_locked(&(obj)->pelem) + void __rxe_drop_index(struct rxe_pool_entry *elem); #define rxe_drop_index(obj) __rxe_drop_index(&(obj)->pelem) -void __rxe_drop_index_nl(struct rxe_pool_entry *elem); - -#define rxe_drop_index_nl(obj) __rxe_drop_index_nl(&(obj)->pelem) - /* assign a key to a keyed object and insert object into - * pool's rb tree with and without holding pool_lock + * pool's rb tree holding and not holding pool_lock */ +void __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key); + +#define rxe_add_key_locked(obj, key) __rxe_add_key_locked(&(obj)->pelem, key) + void __rxe_add_key(struct rxe_pool_entry *elem, void *key); #define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key) -void __rxe_add_key_nl(struct rxe_pool_entry *elem, void *key); +/* remove elem from rb tree holding and not holding the pool_lock */ +void __rxe_drop_key_locked(struct rxe_pool_entry *elem); -#define rxe_add_key_nl(obj, key) __rxe_add_key_nl(&(obj)->pelem, key) +#define rxe_drop_key_locked(obj) __rxe_drop_key_locked(&(obj)->pelem) -/* remove elem from rb tree with and without holding pool_lock */ void __rxe_drop_key(struct rxe_pool_entry *elem); #define rxe_drop_key(obj) __rxe_drop_key(&(obj)->pelem) -void __rxe_drop_key_nl(struct rxe_pool_entry *elem); - -#define rxe_drop_key_nl(obj) __rxe_drop_key_nl(&(obj)->pelem) - -/* lookup an indexed object from index with and without holding pool_lock. +/* lookup an indexed object from index holding and not holding the pool_lock. * takes a reference on object */ +void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index); + void *rxe_pool_get_index(struct rxe_pool *pool, u32 index); -void *rxe_pool_get_index_nl(struct rxe_pool *pool, u32 index); - -/* lookup keyed object from key with and without holding pool_lock. +/* lookup keyed object from key holding and not holding the pool_lock. * takes a reference on the objecti */ -void *rxe_pool_get_key(struct rxe_pool *pool, void *key); +void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key); -void *rxe_pool_get_key_nl(struct rxe_pool *pool, void *key); +void *rxe_pool_get_key(struct rxe_pool *pool, void *key); /* cleanup an object when all references are dropped */ void rxe_elem_release(struct kref *kref); From 4276fd0dddc98ee7d6c1a469b3f35b8ab51ddc2f Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 25 Jan 2021 15:16:38 -0600 Subject: [PATCH 206/414] RDMA/rxe: Remove RXE_POOL_ATOMIC rxe_alloc() used the RXE_POOL_ATOMIC flag in rxe_type_info to select GFP_ATOMIC in calls to kzalloc(). This was intended to handle cases where an object could be created in interrupt context. This no longer occurs since allocating those objects has moved into the core so this flag is not necessary. An incorrect use of this flag was still present for rxe_mc_elem objects and is removed. Link: https://lore.kernel.org/r/20210125211641.2694-4-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 6 +----- drivers/infiniband/sw/rxe/rxe_pool.h | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 5ca54e09cd0e..0ca46bd8be51 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -84,7 +84,6 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { .name = "rxe-mc_elem", .size = sizeof(struct rxe_mc_elem), .elem_offset = offsetof(struct rxe_mc_elem, pelem), - .flags = RXE_POOL_ATOMIC, }, }; @@ -380,8 +379,6 @@ void *rxe_alloc(struct rxe_pool *pool) struct rxe_pool_entry *elem; u8 *obj; - might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); - read_lock_irqsave(&pool->pool_lock, flags); if (pool->state != RXE_POOL_STATE_VALID) { read_unlock_irqrestore(&pool->pool_lock, flags); @@ -397,8 +394,7 @@ void *rxe_alloc(struct rxe_pool *pool) if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; - obj = kzalloc(info->size, (pool->flags & RXE_POOL_ATOMIC) ? - GFP_ATOMIC : GFP_KERNEL); + obj = kzalloc(info->size, GFP_KERNEL); if (!obj) goto out_cnt; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index a75ac2d2847a..22714dafe00d 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -11,7 +11,6 @@ #define RXE_POOL_CACHE_FLAGS (0) enum rxe_pool_flags { - RXE_POOL_ATOMIC = BIT(0), RXE_POOL_INDEX = BIT(1), RXE_POOL_KEY = BIT(2), RXE_POOL_NO_ALLOC = BIT(4), From 6cde3e8ec16f8318bab119ad1e16dd90677bc897 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 25 Jan 2021 15:16:39 -0600 Subject: [PATCH 207/414] RDMA/rxe: Remove references to ib_device and pool rxe_pool.c takes references to the pool and ib_device structs for each object allocated and also keeps an atomic num_elem count in each pool. This is more work than is needed. Pool allocation is only called from verbs APIs which already have references to ib_device and pools are only diasbled when the driver is removed so no protection of the pool addresses are needed. The elem count is used to warn if elements are still present in a pool when it is cleaned up which is useful. This patch eliminates the references to the ib_device and pool structs. Link: https://lore.kernel.org/r/20210125211641.2694-5-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 42 ++-------------------------- drivers/infiniband/sw/rxe/rxe_pool.h | 1 - 2 files changed, 2 insertions(+), 41 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 0ca46bd8be51..5f85a90e5a5a 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -142,8 +142,6 @@ int rxe_pool_init( atomic_set(&pool->num_elem, 0); - kref_init(&pool->ref_cnt); - rwlock_init(&pool->pool_lock); if (rxe_type_info[type].flags & RXE_POOL_INDEX) { @@ -165,19 +163,6 @@ int rxe_pool_init( return err; } -static void rxe_pool_release(struct kref *kref) -{ - struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt); - - pool->state = RXE_POOL_STATE_INVALID; - kfree(pool->index.table); -} - -static void rxe_pool_put(struct rxe_pool *pool) -{ - kref_put(&pool->ref_cnt, rxe_pool_release); -} - void rxe_pool_cleanup(struct rxe_pool *pool) { unsigned long flags; @@ -189,7 +174,8 @@ void rxe_pool_cleanup(struct rxe_pool *pool) pool_name(pool)); write_unlock_irqrestore(&pool->pool_lock, flags); - rxe_pool_put(pool); + pool->state = RXE_POOL_STATE_INVALID; + kfree(pool->index.table); } static u32 alloc_index(struct rxe_pool *pool) @@ -345,11 +331,6 @@ void *rxe_alloc_locked(struct rxe_pool *pool) if (pool->state != RXE_POOL_STATE_VALID) return NULL; - kref_get(&pool->ref_cnt); - - if (!ib_device_try_get(&pool->rxe->ib_dev)) - goto out_put_pool; - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; @@ -366,9 +347,6 @@ void *rxe_alloc_locked(struct rxe_pool *pool) out_cnt: atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); -out_put_pool: - rxe_pool_put(pool); return NULL; } @@ -385,12 +363,8 @@ void *rxe_alloc(struct rxe_pool *pool) return NULL; } - kref_get(&pool->ref_cnt); read_unlock_irqrestore(&pool->pool_lock, flags); - if (!ib_device_try_get(&pool->rxe->ib_dev)) - goto out_put_pool; - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; @@ -407,9 +381,6 @@ void *rxe_alloc(struct rxe_pool *pool) out_cnt: atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); -out_put_pool: - rxe_pool_put(pool); return NULL; } @@ -422,12 +393,8 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) read_unlock_irqrestore(&pool->pool_lock, flags); return -EINVAL; } - kref_get(&pool->ref_cnt); read_unlock_irqrestore(&pool->pool_lock, flags); - if (!ib_device_try_get(&pool->rxe->ib_dev)) - goto out_put_pool; - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; @@ -438,9 +405,6 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) out_cnt: atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); -out_put_pool: - rxe_pool_put(pool); return -EINVAL; } @@ -461,8 +425,6 @@ void rxe_elem_release(struct kref *kref) } atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); - rxe_pool_put(pool); } void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 22714dafe00d..8f8de746ca17 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -68,7 +68,6 @@ struct rxe_pool { struct rxe_dev *rxe; rwlock_t pool_lock; /* protects pool add/del/search */ size_t elem_size; - struct kref ref_cnt; void (*cleanup)(struct rxe_pool_entry *obj); enum rxe_pool_state state; enum rxe_pool_flags flags; From eae5f0642e2f6a07a73f3fd60ecfeae9b4b32d5c Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 25 Jan 2021 15:16:40 -0600 Subject: [PATCH 208/414] RDMA/rxe: Remove unneeded pool->state rxe_pool.c uses the field pool->state to mark a pool as invalid when it is shut down and checks it in several pool APIs to verify that the pool has not been shut down. This is unneeded because the pools are not marked invalid unless the entire driver is being removed at which point no functional APIs should or could be executing. This patch removes this field and associated code. Link: https://lore.kernel.org/r/20210125211641.2694-6-rpearson@hpe.com Suggested-by: zyjzyj2000@gmail.c Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 38 +--------------------------- drivers/infiniband/sw/rxe/rxe_pool.h | 6 ----- 2 files changed, 1 insertion(+), 43 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 5f85a90e5a5a..5aa835028460 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -157,24 +157,16 @@ int rxe_pool_init( pool->key.key_size = rxe_type_info[type].key_size; } - pool->state = RXE_POOL_STATE_VALID; - out: return err; } void rxe_pool_cleanup(struct rxe_pool *pool) { - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - pool->state = RXE_POOL_STATE_INVALID; if (atomic_read(&pool->num_elem) > 0) pr_warn("%s pool destroyed with unfree'd elem\n", pool_name(pool)); - write_unlock_irqrestore(&pool->pool_lock, flags); - pool->state = RXE_POOL_STATE_INVALID; kfree(pool->index.table); } @@ -328,9 +320,6 @@ void *rxe_alloc_locked(struct rxe_pool *pool) struct rxe_pool_entry *elem; u8 *obj; - if (pool->state != RXE_POOL_STATE_VALID) - return NULL; - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; @@ -352,19 +341,10 @@ void *rxe_alloc_locked(struct rxe_pool *pool) void *rxe_alloc(struct rxe_pool *pool) { - unsigned long flags; struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rxe_pool_entry *elem; u8 *obj; - read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) { - read_unlock_irqrestore(&pool->pool_lock, flags); - return NULL; - } - - read_unlock_irqrestore(&pool->pool_lock, flags); - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; @@ -386,15 +366,6 @@ void *rxe_alloc(struct rxe_pool *pool) int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) { - unsigned long flags; - - read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) { - read_unlock_irqrestore(&pool->pool_lock, flags); - return -EINVAL; - } - read_unlock_irqrestore(&pool->pool_lock, flags); - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; @@ -437,9 +408,6 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) - goto out; - node = pool->index.tree.rb_node; while (node) { @@ -460,8 +428,8 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) obj = NULL; } -out: read_unlock_irqrestore(&pool->pool_lock, flags); + return obj; } @@ -473,9 +441,6 @@ void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) u8 *obj = NULL; int cmp; - if (pool->state != RXE_POOL_STATE_VALID) - goto out; - node = pool->key.tree.rb_node; while (node) { @@ -499,7 +464,6 @@ void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) obj = NULL; } -out: return obj; } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 8f8de746ca17..61210b300a78 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -46,11 +46,6 @@ struct rxe_type_info { extern struct rxe_type_info rxe_type_info[]; -enum rxe_pool_state { - RXE_POOL_STATE_INVALID, - RXE_POOL_STATE_VALID, -}; - struct rxe_pool_entry { struct rxe_pool *pool; struct kref ref_cnt; @@ -69,7 +64,6 @@ struct rxe_pool { rwlock_t pool_lock; /* protects pool add/del/search */ size_t elem_size; void (*cleanup)(struct rxe_pool_entry *obj); - enum rxe_pool_state state; enum rxe_pool_flags flags; enum rxe_elem_type type; From ce2063e3872b849421c6135881b1a6fb16d9eab7 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 25 Jan 2021 15:16:41 -0600 Subject: [PATCH 209/414] RDMA/rxe: Replace missing rxe_pool_get_index_locked One of the pool APIs for when caller is holding lock was not defined but is declared in rxe_pool.h. This patch adds the definition. Link: https://lore.kernel.org/r/20210125211641.2694-7-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 5aa835028460..307d8986e7c9 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -398,15 +398,12 @@ void rxe_elem_release(struct kref *kref) atomic_dec(&pool->num_elem); } -void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) +void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index) { struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rb_node *node; struct rxe_pool_entry *elem; - u8 *obj = NULL; - unsigned long flags; - - read_lock_irqsave(&pool->pool_lock, flags); + u8 *obj; node = pool->index.tree.rb_node; @@ -428,6 +425,16 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) obj = NULL; } + return obj; +} + +void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) +{ + u8 *obj; + unsigned long flags; + + read_lock_irqsave(&pool->pool_lock, flags); + obj = rxe_pool_get_index_locked(pool, index); read_unlock_irqrestore(&pool->pool_lock, flags); return obj; @@ -438,7 +445,7 @@ void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rb_node *node; struct rxe_pool_entry *elem; - u8 *obj = NULL; + u8 *obj; int cmp; node = pool->key.tree.rb_node; @@ -469,7 +476,7 @@ void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) void *rxe_pool_get_key(struct rxe_pool *pool, void *key) { - u8 *obj = NULL; + u8 *obj; unsigned long flags; read_lock_irqsave(&pool->pool_lock, flags); From 510a89495552fd0213cb4d14cbb81dc4ea697820 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:13 +0000 Subject: [PATCH 210/414] RDMA/hw/hfi1/intr: Fix some kernel-doc formatting issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/intr.c:99: warning: Function parameter or member 'msg' not described in 'format_hwmsg' drivers/infiniband/hw/hfi1/intr.c:99: warning: Function parameter or member 'msgl' not described in 'format_hwmsg' drivers/infiniband/hw/hfi1/intr.c:99: warning: Function parameter or member 'hwmsg' not described in 'format_hwmsg' drivers/infiniband/hw/hfi1/intr.c:115: warning: Function parameter or member 'hwerrs' not described in 'hfi1_format_hwerrors' drivers/infiniband/hw/hfi1/intr.c:115: warning: Function parameter or member 'hwerrmsgs' not described in 'hfi1_format_hwerrors' drivers/infiniband/hw/hfi1/intr.c:115: warning: Function parameter or member 'nhwerrmsgs' not described in 'hfi1_format_hwerrors' drivers/infiniband/hw/hfi1/intr.c:115: warning: Function parameter or member 'msg' not described in 'hfi1_format_hwerrors' drivers/infiniband/hw/hfi1/intr.c:115: warning: Function parameter or member 'msgl' not described in 'hfi1_format_hwerrors' Link: https://lore.kernel.org/r/20210126124732.3320971-2-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/intr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 387305b768e9..5ba5c11459e7 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -91,9 +91,9 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) /** * format_hwmsg - format a single hwerror message - * @msg message buffer - * @msgl length of message buffer - * @hwmsg message to add to message buffer + * @msg: message buffer + * @msgl: length of message buffer + * @hwmsg: message to add to message buffer */ static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg) { @@ -104,11 +104,11 @@ static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg) /** * hfi1_format_hwerrors - format hardware error messages for display - * @hwerrs hardware errors bit vector - * @hwerrmsgs hardware error descriptions - * @nhwerrmsgs number of hwerrmsgs - * @msg message buffer - * @msgl message buffer length + * @hwerrs: hardware errors bit vector + * @hwerrmsgs: hardware error descriptions + * @nhwerrmsgs: number of hwerrmsgs + * @msg: message buffer + * @msgl: message buffer length */ void hfi1_format_hwerrors(u64 hwerrs, const struct hfi1_hwerror_msgs *hwerrmsgs, size_t nhwerrmsgs, char *msg, size_t msgl) From ce4cc52f2c79756a88926091f38bbc09861ed9ef Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:14 +0000 Subject: [PATCH 211/414] RDMA/sw/rdmavt/srq: Fix a couple of kernel-doc issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/sw/rdmavt/srq.c:78: warning: Function parameter or member 'ibsrq' not described in 'rvt_create_srq' drivers/infiniband/sw/rdmavt/srq.c:78: warning: Excess function parameter 'ibpd' description in 'rvt_create_srq' drivers/infiniband/sw/rdmavt/srq.c:336: warning: Function parameter or member 'udata' not described in 'rvt_destroy_srq' Link: https://lore.kernel.org/r/20210126124732.3320971-3-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/srq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 64d98bf238ab..2a7c2f12d372 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -67,7 +67,7 @@ void rvt_driver_srq_init(struct rvt_dev_info *rdi) /** * rvt_create_srq - create a shared receive queue - * @ibpd: the protection domain of the SRQ to create + * @ibsrq: the protection domain of the SRQ to create * @srq_init_attr: the attributes of the SRQ * @udata: data from libibverbs when creating a user SRQ * @@ -311,7 +311,8 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return ret; } -/** rvt_query_srq - query srq data +/** + * rvt_query_srq - query srq data * @ibsrq: srq to query * @attr: return info in attr * @@ -330,7 +331,7 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) /** * rvt_destroy_srq - destory an srq * @ibsrq: srq object to destroy - * + * @udata: user data for libibverbs.so */ int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { From 36da5370dd82e391946be34c30d40b9126621bde Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:15 +0000 Subject: [PATCH 212/414] RDMA/hw/hfi1/iowait: Demote half-completed kernel-doc and fix formatting issue in another Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/iowait.c:52: warning: Function parameter or member 'tidfunc' not described in 'iowait_init' drivers/infiniband/hw/hfi1/iowait.c:52: warning: Function parameter or member 'wakeup' not described in 'iowait_init' drivers/infiniband/hw/hfi1/iowait.c:52: warning: Function parameter or member 'sdma_drained' not described in 'iowait_init' drivers/infiniband/hw/hfi1/iowait.c:52: warning: Function parameter or member 'init_priority' not described in 'iowait_init' drivers/infiniband/hw/hfi1/iowait.c:52: warning: Excess function parameter 'resume' description in 'iowait_init' drivers/infiniband/hw/hfi1/iowait.c:94: warning: Function parameter or member 'w' not described in 'iowait_set_work_flag' Link: https://lore.kernel.org/r/20210126124732.3320971-4-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/iowait.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c index 5836fe7b2817..111489802614 100644 --- a/drivers/infiniband/hw/hfi1/iowait.c +++ b/drivers/infiniband/hw/hfi1/iowait.c @@ -26,7 +26,7 @@ inline void iowait_clear_flag(struct iowait *wait, u32 flag) clear_bit(flag, &wait->flags); } -/** +/* * iowait_init() - initialize wait structure * @wait: wait struct to initialize * @tx_limit: limit for overflow queuing @@ -88,7 +88,7 @@ void iowait_cancel_work(struct iowait *w) /** * iowait_set_work_flag - set work flag based on leg - * @w - the iowait work struct + * @w: the iowait work struct */ int iowait_set_work_flag(struct iowait_work *w) { From 29f7e5a3b88c2cf5215b1500c0fe0e01bb7ee89f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:16 +0000 Subject: [PATCH 213/414] RDMA/hw/hfi1/mad: Demote half-completed kernel-doc header fix another Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/mad.c:1354: warning: Function parameter or member 'am' not described in '__subn_set_opa_portinfo' drivers/infiniband/hw/hfi1/mad.c:1354: warning: Function parameter or member 'data' not described in '__subn_set_opa_portinfo' drivers/infiniband/hw/hfi1/mad.c:1354: warning: Function parameter or member 'resp_len' not described in '__subn_set_opa_portinfo' drivers/infiniband/hw/hfi1/mad.c:1354: warning: Function parameter or member 'max_len' not described in '__subn_set_opa_portinfo' drivers/infiniband/hw/hfi1/mad.c:1354: warning: Function parameter or member 'local_mad' not described in '__subn_set_opa_portinfo' drivers/infiniband/hw/hfi1/mad.c:4919: warning: Function parameter or member 'out_mad_size' not described in 'hfi1_process_mad' drivers/infiniband/hw/hfi1/mad.c:4919: warning: Function parameter or member 'out_mad_pkey_index' not described in 'hfi1_process_mad' Link: https://lore.kernel.org/r/20210126124732.3320971-5-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/mad.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 3222e3acb79c..e2f2f7847aed 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1341,7 +1341,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, return 0; } -/** +/* * subn_set_opa_portinfo - set port information * @smp: the incoming SM packet * @ibdev: the infiniband device @@ -4902,6 +4902,8 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port, * @in_grh: the global route header for this packet * @in_mad: the incoming MAD * @out_mad: any outgoing MAD reply + * @out_mad_size: size of the outgoing MAD reply + * @out_mad_pkey_index: used to apss back the packet key index * * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not * interested in processing. From e57a8db9cadbc6b8d20262666f5df7f0afb132d3 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:17 +0000 Subject: [PATCH 214/414] RDMA/hw/hfi1/msix: Add description for 'name' and remove superfluous param 'idx' Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/msix.c:120: warning: Function parameter or member 'name' not described in 'msix_request_irq' drivers/infiniband/hw/hfi1/msix.c:120: warning: Excess function parameter 'idx' description in 'msix_request_irq' Link: https://lore.kernel.org/r/20210126124732.3320971-6-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/msix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c index d61ee853d215..cf3040bb177f 100644 --- a/drivers/infiniband/hw/hfi1/msix.c +++ b/drivers/infiniband/hw/hfi1/msix.c @@ -103,8 +103,8 @@ int msix_initialize(struct hfi1_devdata *dd) * @arg: context information for the IRQ * @handler: IRQ handler * @thread: IRQ thread handler (could be NULL) - * @idx: zero base idx if multiple devices are needed * @type: affinty IRQ type + * @name: IRQ name * * Allocated an MSIx vector if available, and then create the appropriate * meta data needed to keep track of the pci IRQ request. From c1e73d03ba5f72013b53b8c539e35d73e10fa994 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:18 +0000 Subject: [PATCH 215/414] RDMA/sw/rdmavt/mad: Fix misspelling of 'rvt_process_mad()'s 'in_mad_size' param Fixes the following W=1 kernel build warning(s): drivers/infiniband/sw/rdmavt/mad.c:78: warning: Function parameter or member 'in_mad_size' not described in 'rvt_process_mad' Link: https://lore.kernel.org/r/20210126124732.3320971-7-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index 8cc4de9aa664..fa5be13a4394 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -57,7 +57,7 @@ * @in_wc: the work completion entry for this packet * @in_grh: the global route header for this packet * @in: the incoming MAD - * @out_mad_size: size of the incoming MAD reply + * @in_mad_size: size of the incoming MAD reply * @out: any outgoing MAD reply * @out_mad_size: size of the outgoing MAD reply * @out_mad_pkey_index: unused From f57cfca846edc47186084cfe64c8161ce8fa7eeb Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:19 +0000 Subject: [PATCH 216/414] RDMA/sw/rdmavt/qp: Fix kernel-doc formatting problem Fixes the following W=1 kernel build warning(s): drivers/infiniband/sw/rdmavt/qp.c:1929: warning: Function parameter or member 'post_parms' not described in 'rvt_qp_valid_operation' Link: https://lore.kernel.org/r/20210126124732.3320971-8-lee.jones@linaro.org Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 76d6bbfbec50..9d13db68283c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1907,7 +1907,7 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, /** * rvt_qp_valid_operation - validate post send wr request * @qp: the qp - * @post_parms_ the post send table for the driver + * @post_parms: the post send table for the driver * @wr: the work request * * The routine validates the operation based on the From 17401db5ffe1f202eabe8889bb86cf179bc66a29 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:20 +0000 Subject: [PATCH 217/414] RDMA/hw/hfi1/netdev_rx: Fix misdocumentation of the 'start_id' param Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/netdev_rx.c:473: warning: Function parameter or member 'start_id' not described in 'hfi1_netdev_get_first_data' drivers/infiniband/hw/hfi1/netdev_rx.c:473: warning: Excess function parameter 'id' description in 'hfi1_netdev_get_first_data' Link: https://lore.kernel.org/r/20210126124732.3320971-9-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/netdev_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index 6d263c9749b3..1fb6e1a0e4e1 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -467,7 +467,7 @@ void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id) * hfi1_netdev_get_first_dat - Gets first entry with greater or equal id. * * @dd: hfi1 dev data - * @id: requested integer id up to INT_MAX + * @start_id: requested integer id up to INT_MAX */ void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id) { From 2a2660277aa40b915e63bc7593279659487e536a Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:21 +0000 Subject: [PATCH 218/414] RDMA/hw/hfi1/pcie: Demote kernel-doc abuses Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/pcie.c:343: warning: Function parameter or member 'dd' not described in 'restore_pci_variables' drivers/infiniband/hw/hfi1/pcie.c:402: warning: Function parameter or member 'dd' not described in 'save_pci_variables' Link: https://lore.kernel.org/r/20210126124732.3320971-10-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/pcie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 18d32f053d26..6f06e9920503 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -334,7 +334,7 @@ int pcie_speeds(struct hfi1_devdata *dd) return 0; } -/** +/* * Restore command and BARs after a reset has wiped them out * * Returns 0 on success, otherwise a negative error value @@ -393,7 +393,7 @@ int restore_pci_variables(struct hfi1_devdata *dd) return pcibios_err_to_errno(ret); } -/** +/* * Save BARs and command to rewrite after device reset * * Returns 0 on success, otherwise a negative error value From f4f86690c4a3fc66227ca6abf2da687c6c24d4e7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:22 +0000 Subject: [PATCH 219/414] RDMA/hw/hfi1/pio_copy: Provide entry for 'pio_copy()'s 'dd' param Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/pio_copy.c:73: warning: Function parameter or member 'dd' not described in 'pio_copy' Link: https://lore.kernel.org/r/20210126124732.3320971-11-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/pio_copy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 4a4ec2397857..14bfd8287f4a 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -55,6 +55,7 @@ /** * pio_copy - copy data block to MMIO space + * @dd: hfi1 dev data * @pbuf: a number of blocks allocated within a PIO send context * @pbc: PBC to send * @from: source, must be 8 byte aligned From 6993fce426267286e7b6c9f12d7c5dee51bca8da Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:23 +0000 Subject: [PATCH 220/414] RDMA/hw/hfi1/rc: Fix a few function documentation issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/rc.c:430: warning: Function parameter or member 'ps' not described in 'hfi1_make_rc_req' drivers/infiniband/hw/hfi1/rc.c:1387: warning: Function parameter or member 'packet' not described in 'hfi1_send_rc_ack' drivers/infiniband/hw/hfi1/rc.c:1387: warning: Function parameter or member 'is_fecn' not described in 'hfi1_send_rc_ack' drivers/infiniband/hw/hfi1/rc.c:1387: warning: Excess function parameter 'qp' description in 'hfi1_send_rc_ack' drivers/infiniband/hw/hfi1/rc.c:2008: warning: Function parameter or member 'aeth' not described in 'do_rc_ack' drivers/infiniband/hw/hfi1/rc.c:2008: warning: Function parameter or member 'val' not described in 'do_rc_ack' drivers/infiniband/hw/hfi1/rc.c:2008: warning: Function parameter or member 'rcd' not described in 'do_rc_ack' drivers/infiniband/hw/hfi1/rc.c:2554: warning: Function parameter or member 'rcd' not described in 'rc_rcv_error' Link: https://lore.kernel.org/r/20210126124732.3320971-12-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 1bb5f57152d3..7194236aec8f 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -421,6 +421,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, /** * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP + * @ps: the current packet state * * Assumes s_lock is held. * @@ -1992,7 +1993,7 @@ static void update_qp_retry_state(struct rvt_qp *qp, u32 psn, u32 spsn, } } -/** +/* * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on * @psn: the packet sequence number of the ACK @@ -2541,6 +2542,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) * @opcode: the opcode for this packet * @psn: the packet sequence number for this packet * @diff: the difference between the PSN and the expected PSN + * @rcd: the receive context * * This is called from hfi1_rc_rcv() to process an unexpected * incoming RC packet for the given QP. From 11edbb1946fa583286d98a0e732bbcce6ffd0ab2 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:24 +0000 Subject: [PATCH 221/414] RDMA/hw/hfi1/qp: Fix some formatting issues and demote kernel-doc abuse Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/qp.c:195: warning: Function parameter or member 'dev' not described in 'verbs_mtu_enum_to_int' drivers/infiniband/hw/hfi1/qp.c:195: warning: Function parameter or member 'mtu' not described in 'verbs_mtu_enum_to_int' drivers/infiniband/hw/hfi1/qp.c:306: warning: Function parameter or member 'qp' not described in 'hfi1_setup_wqe' drivers/infiniband/hw/hfi1/qp.c:306: warning: Function parameter or member 'wqe' not described in 'hfi1_setup_wqe' drivers/infiniband/hw/hfi1/qp.c:306: warning: Function parameter or member 'call_send' not described in 'hfi1_setup_wqe' drivers/infiniband/hw/hfi1/qp.c:922: warning: Function parameter or member 'qp' not described in 'hfi1_qp_iter_cb' drivers/infiniband/hw/hfi1/qp.c:922: warning: Function parameter or member 'v' not described in 'hfi1_qp_iter_cb' Link: https://lore.kernel.org/r/20210126124732.3320971-13-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/qp.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 681bb4e918c9..e037df911512 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -186,7 +186,7 @@ static void flush_iowait(struct rvt_qp *qp) write_sequnlock_irqrestore(lock, flags); } -/** +/* * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs * to blindly pass the MTU enum value from the PathRecord to us. @@ -289,9 +289,9 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, /** * hfi1_setup_wqe - set up the wqe - * @qp - The qp - * @wqe - The built wqe - * @call_send - Determine if the send should be posted or scheduled. + * @qp: The qp + * @wqe: The built wqe + * @call_send: Determine if the send should be posted or scheduled. * * Perform setup of the wqe. This is called * prior to inserting the wqe into the ring but after @@ -595,7 +595,7 @@ struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5) return sde; } -/* +/** * qp_to_send_context - map a qp to a send context * @qp: the QP * @sc5: the 5 bit sc @@ -912,8 +912,8 @@ void notify_error_qp(struct rvt_qp *qp) /** * hfi1_qp_iter_cb - callback for iterator - * @qp - the qp - * @v - the sl in low bits of v + * @qp: the qp + * @v: the sl in low bits of v * * This is called from the iterator callback to work * on an individual qp. From 0654a746663b7cc113adbf63a1b222e86cc3c9c4 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:25 +0000 Subject: [PATCH 222/414] RDMA/hw/hfi1/ruc: Fix a small formatting and description issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/ruc.c:277: warning: Function parameter or member 'bth1' not described in 'hfi1_make_ruc_header_16B' drivers/infiniband/hw/hfi1/ruc.c:365: warning: Function parameter or member 'bth1' not described in 'hfi1_make_ruc_header_9B' drivers/infiniband/hw/hfi1/ruc.c:472: warning: Function parameter or member 'tid' not described in 'hfi1_schedule_send_yield' drivers/infiniband/hw/hfi1/ruc.c:472: warning: Excess function parameter 'timeout' description in 'hfi1_schedule_send_yield' Link: https://lore.kernel.org/r/20210126124732.3320971-14-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ruc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 23ac6057b211..c3fa1814c6a8 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -260,6 +260,7 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, * @qp: the queue pair * @ohdr: a pointer to the destination header memory * @bth0: bth0 passed in from the RC/UC builder + * @bth1: bth1 passed in from the RC/UC builder * @bth2: bth2 passed in from the RC/UC builder * @middle: non zero implies indicates ahg "could" be used * @ps: the current packet state @@ -348,6 +349,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, * @qp: the queue pair * @ohdr: a pointer to the destination header memory * @bth0: bth0 passed in from the RC/UC builder + * @bth1: bth1 passed in from the RC/UC builder * @bth2: bth2 passed in from the RC/UC builder * @middle: non zero implies indicates ahg "could" be used * @ps: the current packet state @@ -455,11 +457,10 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, /** * hfi1_schedule_send_yield - test for a yield required for QP * send engine - * @timeout: Final time for timeout slice for jiffies * @qp: a pointer to QP * @ps: a pointer to a structure with commonly lookup values for * the the send engine progress - * @tid - true if it is the tid leg + * @tid: true if it is the tid leg * * This routine checks if the time slice for the QP has expired * for RC QPs, if so an additional work entry is queued. At this From d748031044bb08e85389ecfb9a26ff801911dcdf Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:26 +0000 Subject: [PATCH 223/414] RDMA/hw/hfi1/sdma: Fix misnaming of 'sdma_send_txlist()'s 'count_out' param Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/sdma.c:2476: warning: Function parameter or member 'count_out' not described in 'sdma_send_txlist' drivers/infiniband/hw/hfi1/sdma.c:2476: warning: Excess function parameter 'count' description in 'sdma_send_txlist' Link: https://lore.kernel.org/r/20210126124732.3320971-15-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/sdma.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 27ec2851160a..46b5290b2839 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -2448,11 +2448,11 @@ int sdma_send_txreq(struct sdma_engine *sde, * @sde: sdma engine to use * @wait: SE wait structure to use when full (may be NULL) * @tx_list: list of sdma_txreqs to submit - * @count: pointer to a u16 which, after return will contain the total number of - * sdma_txreqs removed from the tx_list. This will include sdma_txreqs - * whose SDMA descriptors are submitted to the ring and the sdma_txreqs - * which are added to SDMA engine flush list if the SDMA engine state is - * not running. + * @count_out: pointer to a u16 which, after return will contain the total number of + * sdma_txreqs removed from the tx_list. This will include sdma_txreqs + * whose SDMA descriptors are submitted to the ring and the sdma_txreqs + * which are added to SDMA engine flush list if the SDMA engine state is + * not running. * * The call submits the list into the ring. * From cd7727fdcee3825e3d276784f6ed356949941e07 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:27 +0000 Subject: [PATCH 224/414] RDMA/hw/hfi1/tid_rdma: Fix a plethora of kernel-doc issues Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/tid_rdma.c:321: warning: Function parameter or member 'rdi' not described in 'qp_to_rcd' drivers/infiniband/hw/hfi1/tid_rdma.c:321: warning: Function parameter or member 'qp' not described in 'qp_to_rcd' drivers/infiniband/hw/hfi1/tid_rdma.c:505: warning: Function parameter or member 'queue' not described in 'kernel_tid_waiters' drivers/infiniband/hw/hfi1/tid_rdma.c:536: warning: Function parameter or member 'rcd' not described in 'dequeue_tid_waiter' drivers/infiniband/hw/hfi1/tid_rdma.c:536: warning: Function parameter or member 'queue' not described in 'dequeue_tid_waiter' drivers/infiniband/hw/hfi1/tid_rdma.c:536: warning: Function parameter or member 'qp' not described in 'dequeue_tid_waiter' drivers/infiniband/hw/hfi1/tid_rdma.c:562: warning: Function parameter or member 'queue' not described in 'queue_qp_for_tid_wait' drivers/infiniband/hw/hfi1/tid_rdma.c:607: warning: Function parameter or member 'qp' not described in 'tid_rdma_schedule_tid_wakeup' drivers/infiniband/hw/hfi1/tid_rdma.c:639: warning: Function parameter or member 'work' not described in 'tid_rdma_trigger_resume' drivers/infiniband/hw/hfi1/tid_rdma.c:666: warning: Function parameter or member 'qp' not described in '_tid_rdma_flush_wait' drivers/infiniband/hw/hfi1/tid_rdma.c:666: warning: Function parameter or member 'queue' not described in '_tid_rdma_flush_wait' drivers/infiniband/hw/hfi1/tid_rdma.c:713: warning: Function parameter or member 'rcd' not described in 'kern_reserve_flow' drivers/infiniband/hw/hfi1/tid_rdma.c:713: warning: Function parameter or member 'last' not described in 'kern_reserve_flow' drivers/infiniband/hw/hfi1/tid_rdma.c:879: warning: Function parameter or member 'flow' not described in 'tid_rdma_find_phys_blocks_4k' drivers/infiniband/hw/hfi1/tid_rdma.c:879: warning: Function parameter or member 'pages' not described in 'tid_rdma_find_phys_blocks_4k' drivers/infiniband/hw/hfi1/tid_rdma.c:879: warning: Function parameter or member 'npages' not described in 'tid_rdma_find_phys_blocks_4k' drivers/infiniband/hw/hfi1/tid_rdma.c:879: warning: Function parameter or member 'list' not described in 'tid_rdma_find_phys_blocks_4k' drivers/infiniband/hw/hfi1/tid_rdma.c:972: warning: Function parameter or member 'list' not described in 'tid_flush_pages' drivers/infiniband/hw/hfi1/tid_rdma.c:972: warning: Function parameter or member 'idx' not described in 'tid_flush_pages' drivers/infiniband/hw/hfi1/tid_rdma.c:972: warning: Function parameter or member 'pages' not described in 'tid_flush_pages' drivers/infiniband/hw/hfi1/tid_rdma.c:972: warning: Function parameter or member 'sets' not described in 'tid_flush_pages' drivers/infiniband/hw/hfi1/tid_rdma.c:1017: warning: Function parameter or member 'flow' not described in 'tid_rdma_find_phys_blocks_8k' drivers/infiniband/hw/hfi1/tid_rdma.c:1017: warning: Function parameter or member 'pages' not described in 'tid_rdma_find_phys_blocks_8k' drivers/infiniband/hw/hfi1/tid_rdma.c:1017: warning: Function parameter or member 'npages' not described in 'tid_rdma_find_phys_blocks_8k' drivers/infiniband/hw/hfi1/tid_rdma.c:1017: warning: Function parameter or member 'list' not described in 'tid_rdma_find_phys_blocks_8k' drivers/infiniband/hw/hfi1/tid_rdma.c:1083: warning: Function parameter or member 'flow' not described in 'kern_find_pages' drivers/infiniband/hw/hfi1/tid_rdma.c:1083: warning: Function parameter or member 'pages' not described in 'kern_find_pages' drivers/infiniband/hw/hfi1/tid_rdma.c:1083: warning: Function parameter or member 'ss' not described in 'kern_find_pages' drivers/infiniband/hw/hfi1/tid_rdma.c:1083: warning: Function parameter or member 'last' not described in 'kern_find_pages' drivers/infiniband/hw/hfi1/tid_rdma.c:1604: warning: Function parameter or member 'req' not described in 'hfi1_kern_exp_rcv_free_flows' drivers/infiniband/hw/hfi1/tid_rdma.c:3458: warning: Function parameter or member 'qp' not described in 'hfi1_tid_write_alloc_resources' drivers/infiniband/hw/hfi1/tid_rdma.c:3458: warning: Function parameter or member 'intr_ctx' not described in 'hfi1_tid_write_alloc_resources' Link: https://lore.kernel.org/r/20210126124732.3320971-16-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/tid_rdma.c | 47 +++++++++++++++------------ 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 92aa2a9b3b5a..0b1f9e4d038b 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -309,7 +309,8 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit) /** * qp_to_rcd - determine the receive context used by a qp - * @qp - the qp + * @rdi: rvt dev struct + * @qp: the qp * * This routine returns the receive context associated * with a a qp's qpn. @@ -484,6 +485,7 @@ static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd, /** * kernel_tid_waiters - determine rcd wait * @rcd: the receive context + * @queue: the queue to operate on * @qp: the head of the qp being processed * * This routine will return false IFF @@ -517,7 +519,9 @@ static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd, /** * dequeue_tid_waiter - dequeue the qp from the list - * @qp - the qp to remove the wait list + * @rcd: the receive context + * @queue: the queue to operate on + * @qp: the qp to remove the wait list * * This routine removes the indicated qp from the * wait list if it is there. @@ -549,6 +553,7 @@ static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd, /** * queue_qp_for_tid_wait - suspend QP on tid space * @rcd: the receive context + * @queue: the queue to operate on * @qp: the qp * * The qp is inserted at the tail of the rcd @@ -593,7 +598,7 @@ static void __trigger_tid_waiter(struct rvt_qp *qp) /** * tid_rdma_schedule_tid_wakeup - schedule wakeup for a qp - * @qp - the qp + * @qp: the qp * * trigger a schedule or a waiting qp in a deadlock * safe manner. The qp reference is held prior @@ -630,7 +635,7 @@ static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp) /** * tid_rdma_trigger_resume - field a trigger work request - * @work - the work item + * @work: the work item * * Complete the off qp trigger processing by directly * calling the progress routine. @@ -654,7 +659,7 @@ static void tid_rdma_trigger_resume(struct work_struct *work) rvt_put_qp(qp); } -/** +/* * tid_rdma_flush_wait - unwind any tid space wait * * This is called when resetting a qp to @@ -693,8 +698,8 @@ void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp) /* Flow functions */ /** * kern_reserve_flow - allocate a hardware flow - * @rcd - the context to use for allocation - * @last - the index of the preferred flow. Use RXE_NUM_TID_FLOWS to + * @rcd: the context to use for allocation + * @last: the index of the preferred flow. Use RXE_NUM_TID_FLOWS to * signify "don't care". * * Use a bit mask based allocation to reserve a hardware @@ -860,9 +865,10 @@ static u8 trdma_pset_order(struct tid_rdma_pageset *s) /** * tid_rdma_find_phys_blocks_4k - get groups base on mr info - * @npages - number of pages - * @pages - pointer to an array of page structs - * @list - page set array to return + * @flow: overall info for a TID RDMA segment + * @pages: pointer to an array of page structs + * @npages: number of pages + * @list: page set array to return * * This routine returns the number of groups associated with * the current sge information. This implementation is based @@ -949,10 +955,10 @@ static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow, /** * tid_flush_pages - dump out pages into pagesets - * @list - list of pagesets - * @idx - pointer to current page index - * @pages - number of pages to dump - * @sets - current number of pagesset + * @list: list of pagesets + * @idx: pointer to current page index + * @pages: number of pages to dump + * @sets: current number of pagesset * * This routine flushes out accumuated pages. * @@ -990,9 +996,10 @@ static u32 tid_flush_pages(struct tid_rdma_pageset *list, /** * tid_rdma_find_phys_blocks_8k - get groups base on mr info - * @pages - pointer to an array of page structs - * @npages - number of pages - * @list - page set array to return + * @flow: overall info for a TID RDMA segment + * @pages: pointer to an array of page structs + * @npages: number of pages + * @list: page set array to return * * This routine parses an array of pages to compute pagesets * in an 8k compatible way. @@ -1064,7 +1071,7 @@ static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow, return sets; } -/** +/* * Find pages for one segment of a sge array represented by @ss. The function * does not check the sge, the sge must have been checked for alignment with a * prior call to hfi1_kern_trdma_ok. Other sge checking is done as part of @@ -1598,7 +1605,7 @@ void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req) /** * hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information - * @req - the tid rdma request to be cleaned + * @req: the tid rdma request to be cleaned */ static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req) { @@ -3435,7 +3442,7 @@ static u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg) return 0; } -/** +/* * Central place for resource allocation at TID write responder, * is called from write_req and write_data interrupt handlers as * well as the send thread when a queued QP is scheduled for From d6a8bfe01ddb5f5a68f228a87838cc51ba2aec78 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:28 +0000 Subject: [PATCH 225/414] RDMA/hw/hfi1/uc: Fix a little doc-rot Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/uc.c:64: warning: Function parameter or member 'ps' not described in 'hfi1_make_uc_req' drivers/infiniband/hw/hfi1/uc.c:306: warning: Function parameter or member 'packet' not described in 'hfi1_uc_rcv' drivers/infiniband/hw/hfi1/uc.c:306: warning: Excess function parameter 'ibp' description in 'hfi1_uc_rcv' drivers/infiniband/hw/hfi1/uc.c:306: warning: Excess function parameter 'hdr' description in 'hfi1_uc_rcv' drivers/infiniband/hw/hfi1/uc.c:306: warning: Excess function parameter 'rcv_flags' description in 'hfi1_uc_rcv' drivers/infiniband/hw/hfi1/uc.c:306: warning: Excess function parameter 'data' description in 'hfi1_uc_rcv' drivers/infiniband/hw/hfi1/uc.c:306: warning: Excess function parameter 'tlen' description in 'hfi1_uc_rcv' drivers/infiniband/hw/hfi1/uc.c:306: warning: Excess function parameter 'qp' description in 'hfi1_uc_rcv' Link: https://lore.kernel.org/r/20210126124732.3320971-17-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/uc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 1fb918399da0..5b0f536b34e0 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -55,6 +55,7 @@ /** * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP + * @ps: the current packet state * * Assume s_lock is held. * @@ -291,12 +292,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) /** * hfi1_uc_rcv - handle an incoming UC packet - * @ibp: the port the packet came in on - * @hdr: the header of the packet - * @rcv_flags: flags relevant to rcv processing - * @data: the packet data - * @tlen: the length of the packet - * @qp: the QP for this packet. + * @packet: the packet structure * * This is called from qp_rcv() to process an incoming UC packet * for the given QP. From 16e1b5364fc17fca86bbeb89c276cb4615b6bce1 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:29 +0000 Subject: [PATCH 226/414] RDMA/hw/hfi1/ud: Fix a little more doc-rot Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/ud.c:477: warning: Function parameter or member 'ps' not described in 'hfi1_make_ud_req' drivers/infiniband/hw/hfi1/ud.c:855: warning: Function parameter or member 'packet' not described in 'hfi1_ud_rcv' drivers/infiniband/hw/hfi1/ud.c:855: warning: Excess function parameter 'ibp' description in 'hfi1_ud_rcv' drivers/infiniband/hw/hfi1/ud.c:855: warning: Excess function parameter 'hdr' description in 'hfi1_ud_rcv' drivers/infiniband/hw/hfi1/ud.c:855: warning: Excess function parameter 'rcv_flags' description in 'hfi1_ud_rcv' drivers/infiniband/hw/hfi1/ud.c:855: warning: Excess function parameter 'data' description in 'hfi1_ud_rcv' drivers/infiniband/hw/hfi1/ud.c:855: warning: Excess function parameter 'tlen' description in 'hfi1_ud_rcv' drivers/infiniband/hw/hfi1/ud.c:855: warning: Excess function parameter 'qp' description in 'hfi1_ud_rcv' Link: https://lore.kernel.org/r/20210126124732.3320971-18-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ud.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index e804af71b629..6ecb984c85fa 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -468,6 +468,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, /** * hfi1_make_ud_req - construct a UD request packet * @qp: the QP + * @ps: the current packet state * * Assume s_lock is held. * @@ -840,12 +841,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, /** * hfi1_ud_rcv - receive an incoming UD packet - * @ibp: the port the packet came in on - * @hdr: the packet header - * @rcv_flags: flags relevant to rcv processing - * @data: the packet data - * @tlen: the packet length - * @qp: the QP the packet came on + * @packet: the packet structure * * This is called from qp_rcv() to process an incoming UD packet * for the given QP. From c6f2b65e9ae3bcdd14b749074b55612c95eac96a Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:30 +0000 Subject: [PATCH 227/414] RDMA/hw/hfi1/user_exp_rcv: Demote half-documented and kernel-doc abuses Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/user_exp_rcv.c:174: warning: Function parameter or member 'fd' not described in 'unpin_rcv_pages' drivers/infiniband/hw/hfi1/user_exp_rcv.c:174: warning: Function parameter or member 'tidbuf' not described in 'unpin_rcv_pages' drivers/infiniband/hw/hfi1/user_exp_rcv.c:174: warning: Function parameter or member 'node' not described in 'unpin_rcv_pages' drivers/infiniband/hw/hfi1/user_exp_rcv.c:174: warning: Function parameter or member 'idx' not described in 'unpin_rcv_pages' drivers/infiniband/hw/hfi1/user_exp_rcv.c:174: warning: Function parameter or member 'npages' not described in 'unpin_rcv_pages' drivers/infiniband/hw/hfi1/user_exp_rcv.c:174: warning: Function parameter or member 'mapped' not described in 'unpin_rcv_pages' drivers/infiniband/hw/hfi1/user_exp_rcv.c:196: warning: Function parameter or member 'fd' not described in 'pin_rcv_pages' drivers/infiniband/hw/hfi1/user_exp_rcv.c:196: warning: Function parameter or member 'tidbuf' not described in 'pin_rcv_pages' Link: https://lore.kernel.org/r/20210126124732.3320971-19-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index b94fc7fd75a9..58dcab2679d9 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -154,12 +154,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) fd->entry_to_rb = NULL; } -/** +/* * Release pinned receive buffer pages. * - * @mapped - true if the pages have been DMA mapped. false otherwise. - * @idx - Index of the first page to unpin. - * @npages - No of pages to unpin. + * @mapped: true if the pages have been DMA mapped. false otherwise. + * @idx: Index of the first page to unpin. + * @npages: No of pages to unpin. * * If the pages have been DMA mapped (indicated by mapped parameter), their * info will be passed via a struct tid_rb_node. If they haven't been mapped, @@ -189,7 +189,7 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd, fd->tid_n_pinned -= npages; } -/** +/* * Pin receive buffer pages. */ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) From cd5962d4fa72a09a4ac386a3804713bda1ab457d Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:31 +0000 Subject: [PATCH 228/414] RDMA/hw/hfi1/verbs: Demote non-conforming doc header and fix a misspelling Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/verbs.c:741: warning: Function parameter or member 'qp' not described in 'update_tx_opstats' drivers/infiniband/hw/hfi1/verbs.c:1160: warning: Function parameter or member 'pkey' not described in 'egress_pkey_check' drivers/infiniband/hw/hfi1/verbs.c:1160: warning: Excess function parameter 'bkey' description in 'egress_pkey_check' drivers/infiniband/hw/hfi1/verbs.c:1217: warning: Function parameter or member 'qp' not described in 'get_send_routine' drivers/infiniband/hw/hfi1/verbs.c:1217: warning: Function parameter or member 'ps' not described in 'get_send_routine' Link: https://lore.kernel.org/r/20210126124732.3320971-20-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/verbs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 3591923abebb..0dd4bb0a5a7e 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -729,7 +729,7 @@ static noinline int build_verbs_ulp_payload( /** * update_tx_opstats - record stats by opcode - * @qp; the qp + * @qp: the qp * @ps: transmit packet state * @plen: the plen in dwords * @@ -1145,7 +1145,7 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) * egress_pkey_check - check P_KEY of a packet * @ppd: Physical IB port data * @slid: SLID for packet - * @bkey: PKEY for header + * @pkey: PKEY for header * @sc5: SC for packet * @s_pkey_index: It will be used for look up optimization for kernel contexts * only. If it is negative value, then it means user contexts is calling this @@ -1206,7 +1206,7 @@ int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey, return 1; } -/** +/* * get_send_routine - choose an egress routine * * Choose an egress routine based on QP type From c2a54b68738c0261cdc1b399e019c71cba389686 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 26 Jan 2021 12:47:32 +0000 Subject: [PATCH 229/414] RDMA/hw/hfi1/rc: Demote incorrectly populated kernel-doc header Fixes the following W=1 kernel build warning(s): drivers/infiniband/hw/hfi1/rc.c:1388: warning: Function parameter or member 'packet' not described in 'hfi1_send_rc_ack' drivers/infiniband/hw/hfi1/rc.c:1388: warning: Function parameter or member 'is_fecn' not described in 'hfi1_send_rc_ack' drivers/infiniband/hw/hfi1/rc.c:1388: warning: Excess function parameter 'qp' description in 'hfi1_send_rc_ack' Link: https://lore.kernel.org/r/20210126124732.3320971-21-lee.jones@linaro.org Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 7194236aec8f..0174b8ee9f00 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1376,9 +1376,8 @@ static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = { [HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B }; -/** +/* * hfi1_send_rc_ack - Construct an ACK packet and send it - * @qp: a pointer to the QP * * This is called from hfi1_rc_rcv() and handle_receive_interrupt(). * Note that RDMA reads and atomics are handled in the From 68fb9f3e312a36e49fd05ec2d6b668daf2c4931a Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 26 Jan 2021 14:06:57 +0200 Subject: [PATCH 230/414] RDMA/efa: Remove redundant NULL pointer check of CQE A pointer to store the command completion must be provided as it is always used in efa_com_put_comp_ctx() to return the completion context back to the pool. Remove the NULL pointer check and the redundant 'status' field stored on the context as it could be retrieved from the completion itself. Link: https://lore.kernel.org/r/20210126120702.9807-2-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index f7242188a843..747efc794cc0 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" @@ -33,8 +33,6 @@ struct efa_comp_ctx { struct efa_admin_acq_entry *user_cqe; u32 comp_size; enum efa_cmd_status status; - /* status from the device */ - u8 comp_status; u8 cmd_opcode; u8 occupied; }; @@ -421,9 +419,7 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a } comp_ctx->status = EFA_CMD_COMPLETED; - comp_ctx->comp_status = cqe->acq_common_descriptor.status; - if (comp_ctx->user_cqe) - memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); + memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state)) complete(&comp_ctx->wait_event); @@ -521,7 +517,7 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c msleep(aq->poll_interval); } - err = efa_com_comp_status_to_errno(comp_ctx->comp_status); + err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); out: efa_com_put_comp_ctx(aq, comp_ctx); return err; @@ -569,7 +565,7 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com goto out; } - err = efa_com_comp_status_to_errno(comp_ctx->comp_status); + err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); out: efa_com_put_comp_ctx(aq, comp_ctx); return err; @@ -641,8 +637,8 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, aq->efa_dev, "Failed to process command %s (opcode %u) comp_status %d err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, comp_ctx->comp_status, - err); + cmd->aq_common_descriptor.opcode, + comp_ctx->user_cqe->acq_common_descriptor.status, err); atomic64_inc(&aq->stats.cmd_err); } From c2a5056814f5dd77e7a20ed27b99820dbf6c2a12 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 26 Jan 2021 14:06:58 +0200 Subject: [PATCH 231/414] RDMA/efa: Remove duplication of upper/lower_32_bits The EFA_DMA_ADDR_TO_UINT32_HIGH/LOW macros are the same as the kernel's upper/lower_32_bits, remove them and use kernel macros instead. Link: https://lore.kernel.org/r/20210126120702.9807-3-galpress@amazon.com Reviewed-by: Firas JahJah Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 747efc794cc0..0d523ad736c7 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -20,9 +20,6 @@ #define EFA_CTRL_MINOR 0 #define EFA_CTRL_SUB_MINOR 1 -#define EFA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) -#define EFA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) - enum efa_cmd_status { EFA_CMD_SUBMITTED, EFA_CMD_COMPLETED, @@ -138,8 +135,8 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev) sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF); - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr); - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr); + addr_high = upper_32_bits(sq->dma_addr); + addr_low = lower_32_bits(sq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF); @@ -172,8 +169,8 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev) cq->cc = 0; cq->phase = 1; - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr); - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr); + addr_high = upper_32_bits(cq->dma_addr); + addr_low = lower_32_bits(cq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF); @@ -213,8 +210,8 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev, aenq->cc = 0; aenq->phase = 1; - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr); - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr); + addr_low = lower_32_bits(aenq->dma_addr); + addr_high = upper_32_bits(aenq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF); From 631b6189b8fef6f65db99498ad8e81fadd97d41e Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 26 Jan 2021 14:06:59 +0200 Subject: [PATCH 232/414] RDMA/efa: Remove unnecessary indentation in defs comments The indentation in the subsequent comment lines is unnecessary, remove it. Link: https://lore.kernel.org/r/20210126120702.9807-4-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 8 ++++---- drivers/infiniband/hw/efa/efa_admin_defs.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index b199e4ac6cf9..d383850ee446 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_CMDS_H_ @@ -161,8 +161,8 @@ struct efa_admin_create_qp_resp { u32 qp_handle; /* - * QP number in the given EFA virtual device. Least-significant bits - * (as needed according to max_qp) carry unique QP ID + * QP number in the given EFA virtual device. Least-significant bits (as + * needed according to max_qp) carry unique QP ID */ u16 qp_num; @@ -465,7 +465,7 @@ struct efa_admin_create_cq_cmd { /* * number of sub cqs - must be equal to sub_cqs_per_cq of queue - * attributes. + * attributes. */ u16 num_sub_cqs; diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h index 29d53ed63b3e..78ff9389ae25 100644 --- a/drivers/infiniband/hw/efa/efa_admin_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_H_ @@ -82,7 +82,7 @@ struct efa_admin_acq_common_desc { /* * indicates to the driver which AQ entry has been consumed by the - * device and could be reused + * device and could be reused */ u16 sq_head_indx; }; From 9bf61b8ce5557b0bb97132fb81edce04c4ba99fb Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 26 Jan 2021 14:07:00 +0200 Subject: [PATCH 233/414] RDMA/efa: Remove unused 'select' field from get/set feature command descriptor The 'select' field in the get/set feature admin command is unimplemented, unused and misleading, remove it. The command always refers to the current values. Link: https://lore.kernel.org/r/20210126120702.9807-5-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index d383850ee446..9327d5e3ec62 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -563,12 +563,8 @@ struct efa_admin_acq_get_stats_resp { }; struct efa_admin_get_set_feature_common_desc { - /* - * 1:0 : select - 0x1 - current value; 0x3 - default - * value - * 7:3 : reserved3 - MBZ - */ - u8 flags; + /* MBZ */ + u8 reserved0; /* as appears in efa_admin_aq_feature_id */ u8 feature_id; @@ -909,9 +905,6 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) -/* get_set_feature_common_desc */ -#define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) - /* feature_device_attr_desc */ #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) From 4629c5d4db91e2141bd5cad1e75325fb4a1f78a5 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 26 Jan 2021 14:07:01 +0200 Subject: [PATCH 234/414] RDMA/efa: Remove unused syndrome enum values The notification syndrome enum values are unused, remove them. Link: https://lore.kernel.org/r/20210126120702.9807-6-galpress@amazon.com Reviewed-by: Firas JahJah Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 9327d5e3ec62..fa38b34eddb8 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -819,12 +819,6 @@ enum efa_admin_aenq_group { EFA_ADMIN_AENQ_GROUPS_NUM = 5, }; -enum efa_admin_aenq_notification_syndrom { - EFA_ADMIN_SUSPEND = 0, - EFA_ADMIN_RESUME = 1, - EFA_ADMIN_UPDATE_HINTS = 2, -}; - struct efa_admin_mmio_req_read_less_resp { u16 req_id; From c867c78acae96fc359f2a2f375ce64b5f0871802 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 27 Jan 2021 16:29:28 +0000 Subject: [PATCH 235/414] iommu/msm: Hook up iotlb_sync_map The core API can now accommodate invalidate-on-map style behaviour in a single efficient call, so hook that up instead of having io-pgatble do it piecemeal. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/e95223a0abf129230a0bec6743f837075f0a2fcb.1611764372.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/msm_iommu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 040e85f70861..f0ba6a09b434 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -343,7 +343,6 @@ static int msm_iommu_domain_config(struct msm_priv *priv) spin_lock_init(&priv->pgtlock); priv->cfg = (struct io_pgtable_cfg) { - .quirks = IO_PGTABLE_QUIRK_TLBI_ON_MAP, .pgsize_bitmap = msm_iommu_ops.pgsize_bitmap, .ias = 32, .oas = 32, @@ -490,6 +489,14 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } +static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) +{ + struct msm_priv *priv = to_msm_priv(domain); + + __flush_iotlb_range(iova, size, SZ_4K, false, priv); +} + static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t len, struct iommu_iotlb_gather *gather) { @@ -680,6 +687,7 @@ static struct iommu_ops msm_iommu_ops = { * kick starting the other master. */ .iotlb_sync = NULL, + .iotlb_sync_map = msm_iommu_sync_map, .iova_to_phys = msm_iommu_iova_to_phys, .probe_device = msm_iommu_probe_device, .release_device = msm_iommu_release_device, From 3d5eab41451f8e28f3e45eef8f6b372bf56612fb Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 27 Jan 2021 16:29:29 +0000 Subject: [PATCH 236/414] iommu/io-pgtable: Remove TLBI_ON_MAP quirk IO_PGTABLE_QUIRK_TLBI_ON_MAP is now fully superseded by the core API's iotlb_sync_map callback. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/5abb80bba3a7c371d5ffb7e59c05586deddb9a91.1611764372.git.robin.murphy@arm.com [will: Remove unused 'iop' local variable from arm_v7s_map()] Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 9 +-------- include/linux/io-pgtable.h | 5 ----- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 1d92ac948db7..929cb1518db1 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -519,7 +519,6 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); - struct io_pgtable *iop = &data->iop; int ret; if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) || @@ -535,12 +534,7 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, * Synchronise all PTE updates for the new mapping before there's * a chance for anything to kick off a table walk for the new iova. */ - if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) { - io_pgtable_tlb_flush_walk(iop, iova, size, - ARM_V7S_BLOCK_SIZE(2)); - } else { - wmb(); - } + wmb(); return ret; } @@ -759,7 +753,6 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | - IO_PGTABLE_QUIRK_TLBI_ON_MAP | IO_PGTABLE_QUIRK_ARM_MTK_EXT | IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 2a5686ca2ba3..06753323a15e 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -68,10 +68,6 @@ struct io_pgtable_cfg { * hardware which does not implement the permissions of a given * format, and/or requires some format-specific default value. * - * IO_PGTABLE_QUIRK_TLBI_ON_MAP: If the format forbids caching invalid - * (unmapped) entries but the hardware might do so anyway, perform - * TLB maintenance when mapping as well as when unmapping. - * * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) MediaTek IOMMUs extend * to support up to 34 bits PA where the bit32 and bit33 are * encoded in the bit9 and bit4 of the PTE respectively. @@ -88,7 +84,6 @@ struct io_pgtable_cfg { */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) - #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) From 187623b1d8b21b6fdab9b963465f71ad47b8c279 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 27 Jan 2021 11:43:43 +0100 Subject: [PATCH 237/414] Documentation: kernel-parameters: add missing '<' Acked-by: Randy Dunlap Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20210127104343.5647-1-wsa+renesas@sang-engineering.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index 682ab28b5c94..1132796a8d96 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -60,7 +60,7 @@ Note that for the special case of a range one can split the range into equal sized groups and for each group use some amount from the beginning of that group: - -cpu number>:/ + -:/ For example one can add to the command line following parameter: From bcadb65fd64889cc3cf1ca4b8025d91b59ec1b02 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Wed, 27 Jan 2021 16:49:11 +0800 Subject: [PATCH 238/414] Documentation/x86/boot.rst: Correct the example of SETUP_INDIRECT struct setup_data.len is the length of data field. In case of SETUP_INDIRECT, it should be sizeof(setup_indirect). Signed-off-by: Cao jin Reviewed-by: Daniel Kiper Link: https://lore.kernel.org/r/20210127084911.63438-1-jojing64@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/x86/boot.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst index abb9fc164657..fc844913dece 100644 --- a/Documentation/x86/boot.rst +++ b/Documentation/x86/boot.rst @@ -851,7 +851,7 @@ Protocol: 2.09+ struct setup_data { __u64 next = 0 or ; __u32 type = SETUP_INDIRECT; - __u32 len = sizeof(setup_data); + __u32 len = sizeof(setup_indirect); __u8 data[sizeof(setup_indirect)] = struct setup_indirect { __u32 type = SETUP_INDIRECT | SETUP_E820_EXT; __u32 reserved = 0; From dde0dc3a8e6770e79fa05fdf3d0d9f679d245cc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 21 Jan 2021 20:34:17 +0100 Subject: [PATCH 239/414] Documentation: arm: Fix marvell file name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: dc7a12bdfccd ("docs: arm: convert docs to ReST and rename to *.rst") Signed-off-by: Pali Rohár Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210121193418.22678-1-pali@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/arm/index.rst | 2 +- Documentation/arm/{marvel.rst => marvell.rst} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename Documentation/arm/{marvel.rst => marvell.rst} (100%) diff --git a/Documentation/arm/index.rst b/Documentation/arm/index.rst index a2e9e1bba7b9..b4bea32472b6 100644 --- a/Documentation/arm/index.rst +++ b/Documentation/arm/index.rst @@ -33,7 +33,7 @@ SoC-specific documents ixp4xx - marvel + marvell microchip netwinder diff --git a/Documentation/arm/marvel.rst b/Documentation/arm/marvell.rst similarity index 100% rename from Documentation/arm/marvel.rst rename to Documentation/arm/marvell.rst From feb47df1faaa7d7be0791bc17183c9bbdb8c9352 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 21 Jan 2021 20:34:18 +0100 Subject: [PATCH 240/414] Documentation: arm: marvell: Add link to public Armada 37xx Hardware Spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pali Rohár Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210121193418.22678-2-pali@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/arm/marvell.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst index 16ab2eb085b8..b16e6f7e8dbe 100644 --- a/Documentation/arm/marvell.rst +++ b/Documentation/arm/marvell.rst @@ -185,6 +185,9 @@ EBU Armada family ARMv8 Product Brief: http://www.marvell.com/embedded-processors/assets/PB-88F3700-FNL.pdf + Hardware Spec: + http://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-37xx-hardware-specifications-2019-09.pdf + Device tree files: arch/arm64/boot/dts/marvell/armada-37* From c4822bd66fb147b4441343e9a235717229828258 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 25 Jan 2021 15:13:41 +0100 Subject: [PATCH 241/414] Documentation: arm: marvell: Fix dead link to Armada 37xx Product Brief MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pali Rohár Link: https://lore.kernel.org/r/20210125141341.32200-1-pali@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/arm/marvell.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst index b16e6f7e8dbe..fa22a72d4391 100644 --- a/Documentation/arm/marvell.rst +++ b/Documentation/arm/marvell.rst @@ -183,7 +183,7 @@ EBU Armada family ARMv8 http://www.marvell.com/embedded-processors/armada-3700/ Product Brief: - http://www.marvell.com/embedded-processors/assets/PB-88F3700-FNL.pdf + http://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-37xx-product-brief-2016-01.pdf Hardware Spec: http://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-37xx-hardware-specifications-2019-09.pdf From 5d2699d28c4dc5f99f84ec0bf197bdc6ea23f80f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 25 Jan 2021 15:15:29 +0100 Subject: [PATCH 242/414] Documentation: arm: marvell: Update link to unrestricted Armada 38x Functional Spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Marvell website is documentation accessible without need to register or fill any other forms. Signed-off-by: Pali Rohár Link: https://lore.kernel.org/r/20210125141529.32357-1-pali@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/arm/marvell.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst index fa22a72d4391..94cd73383594 100644 --- a/Documentation/arm/marvell.rst +++ b/Documentation/arm/marvell.rst @@ -127,7 +127,7 @@ EBU Armada family - 88F6828 Armada 388 - Product infos: http://www.marvell.com/embedded-processors/armada-38x/ - - Functional Spec: https://marvellcorp.wufoo.com/forms/marvell-armada-38x-functional-specifications/ + - Functional Spec: http://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-38x-functional-specifications-2015-11.pdf Core: ARM Cortex-A9 From bc47190d4f14f0ef0cb40c828a65316bde9259b2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 24 Jan 2021 20:32:02 -0800 Subject: [PATCH 243/414] Documentation/admin-guide: kernel-parameters: update CMA entries Add qualifying build option legend [CMA] to kernel boot options that requirce CMA support to be enabled for them to be usable. Also capitalize 'CMA' when it is used as an acronym. Signed-off-by: Randy Dunlap Acked-by: Mike Kravetz Link: https://lore.kernel.org/r/20210125043202.22399-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9e3cdb271d06..50d9b9bfcd6e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -600,7 +600,7 @@ kernel/dma/contiguous.c cma_pernuma=nn[MG] - [ARM64,KNL] + [ARM64,KNL,CMA] Sets the size of kernel per-numa memory area for contiguous memory allocations. A value of 0 disables per-numa CMA altogether. And If this option is not @@ -1524,12 +1524,12 @@ hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET registers. Default set by CONFIG_HPET_MMAP_DEFAULT. - hugetlb_cma= [HW] The size of a cma area used for allocation + hugetlb_cma= [HW,CMA] The size of a CMA area used for allocation of gigantic hugepages. Format: nn[KMGTPE] - Reserve a cma area of given size and allocate gigantic - hugepages using the cma allocator. If enabled, the + Reserve a CMA area of given size and allocate gigantic + hugepages using the CMA allocator. If enabled, the boot-time allocation of gigantic hugepages is skipped. hugepages= [HW] Number of HugeTLB pages to allocate at boot. From 56c6092be2a145eadab19700688d9716e9de01d6 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 25 Jan 2021 09:10:30 +0200 Subject: [PATCH 244/414] Documentation: ARM: fix reference to DT format documentation The booting-without-of.rst file is no longer there. Link to devicetree.org instead. Fixes: 441848282c590 ("dt: Remove booting-without-of.rst") Signed-off-by: Baruch Siach Cc: Rob Herring Link: https://lore.kernel.org/r/7f07e544d9fc584242d496c2f54f9303d8de0724.1611558630.git.baruch@tkos.co.il Signed-off-by: Jonathan Corbet --- Documentation/arm/booting.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/arm/booting.rst b/Documentation/arm/booting.rst index a2263451dc2c..5974e37b3d20 100644 --- a/Documentation/arm/booting.rst +++ b/Documentation/arm/booting.rst @@ -128,7 +128,7 @@ it. The recommended placement is in the first 16KiB of RAM. The boot loader must load a device tree image (dtb) into system ram at a 64bit aligned address and initialize it with the boot data. The -dtb format is documented in Documentation/devicetree/booting-without-of.rst. +dtb format is documented at https://www.devicetree.org/specifications/. The kernel will look for the dtb magic value of 0xd00dfeed at the dtb physical address to determine if a dtb has been passed instead of a tagged list. From 452f81ed05e2c0762b27ab252dca59aa2457baca Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Thu, 21 Jan 2021 11:33:00 +0800 Subject: [PATCH 245/414] docs/zh_CN: add iio iio_configfs.rst translation This patch translates Documentation/iio/iio_configfs.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Reviewed-by: Huacai Chen Link: https://lore.kernel.org/r/20210121033302.558935-1-siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/iio/iio_configfs.rst | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 Documentation/translations/zh_CN/iio/iio_configfs.rst diff --git a/Documentation/translations/zh_CN/iio/iio_configfs.rst b/Documentation/translations/zh_CN/iio/iio_configfs.rst new file mode 100644 index 000000000000..274488e8dce4 --- /dev/null +++ b/Documentation/translations/zh_CN/iio/iio_configfs.rst @@ -0,0 +1,102 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: :doc:`../../../iio/iio_configfs` +:Translator: Yanteng Si + +.. _cn_iio_configfs: + + +===================== +工业 IIO configfs支持 +===================== + +1. 概述 +======= + +Configfs是一种内核对象的基于文件系统的管理系统,IIO使用一些可以通过 +configfs轻松配置的对象(例如:设备,触发器)。 + +关于configfs是如何运行的,请查阅Documentation/filesystems/configfs.rst +了解更多信息。 + +2. 用法 +======= +为了使configfs支持IIO,我们需要在编译时选中config的CONFIG_IIO_CONFIGFS +选项。 + +然后,挂载configfs文件系统(通常在 /config directory目录下):: + + $ mkdir/config + $ mount -t configfs none/config + +此时,将创建所有默认IIO组,并可以在/ config / iio下对其进行访问。 下一章 +将介绍可用的IIO配置对象。 + +3. 软件触发器 +============= + +IIO默认configfs组之一是“触发器”组。 挂载configfs后可以自动访问它,并且可 +以在/config/iio/triggers下找到。 + +IIO软件触发器为创建多种触发器类型提供了支持。 通常在include/linux/iio +/sw_trigger.h:中的接口下将新的触发器类型实现为单独的内核模块: +:: + + /* + * drivers/iio/trigger/iio-trig-sample.c + * 一种新触发器类型的内核模块实例 + */ + #include + + + static struct iio_sw_trigger *iio_trig_sample_probe(const char *name) + { + /* + * 这将分配并注册一个IIO触发器以及其他触发器类型特性的初始化。 + */ + } + + static int iio_trig_sample_remove(struct iio_sw_trigger *swt) + { + /* + * 这会废弃iio_trig_sample_probe中的操作 + */ + } + + static const struct iio_sw_trigger_ops iio_trig_sample_ops = { + .probe = iio_trig_sample_probe, + .remove = iio_trig_sample_remove, + }; + + static struct iio_sw_trigger_type iio_trig_sample = { + .name = "trig-sample", + .owner = THIS_MODULE, + .ops = &iio_trig_sample_ops, + }; + +module_iio_sw_trigger_driver(iio_trig_sample); + +每种触发器类型在/config/iio/triggers下都有其自己的目录。 加载iio-trig-sample +模块将创建“ trig-sample”触发器类型目录/config/iio/triggers/trig-sample. + +我们支持以下中断源(触发器类型) + + * hrtimer,使用高分辨率定时器作为中断源 + +3.1 Hrtimer触发器创建与销毁 +--------------------------- + +加载iio-trig-hrtimer模块将注册hrtimer触发器类型,从而允许用户在 +/config/iio/triggers/hrtimer下创建hrtimer触发器。 + +例如:: + + $ mkdir /config/iio/triggers/hrtimer/instance1 + $ rmdir /config/iio/triggers/hrtimer/instance1 + +每个触发器可以具有一个或多个独特的触发器类型的属性。 + +3.2 "hrtimer" 触发器类型属性 +---------------------------- + +"hrtimer”触发器类型没有来自/config dir的任何可配置属性。 From 9ea800c0113d377e71324b668115f4ae24835931 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Thu, 21 Jan 2021 11:33:01 +0800 Subject: [PATCH 246/414] docs/zh_CN: add iio ep93xx_adc.rst translation This patch translates Documentation/iio/ep93xx_adc.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Reviewed-by: Huacai Chen Link: https://lore.kernel.org/r/20210121033302.558935-2-siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/iio/ep93xx_adc.rst | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 Documentation/translations/zh_CN/iio/ep93xx_adc.rst diff --git a/Documentation/translations/zh_CN/iio/ep93xx_adc.rst b/Documentation/translations/zh_CN/iio/ep93xx_adc.rst new file mode 100644 index 000000000000..7e91d2197867 --- /dev/null +++ b/Documentation/translations/zh_CN/iio/ep93xx_adc.rst @@ -0,0 +1,46 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: :doc:`../../../iio/ep93xx_adc` +:Translator: Yanteng Si + +.. _cn_iio_ep93xx_adc: + + +================================== +思睿逻辑 EP93xx 模拟数字转换器驱动 +================================== + +1. 概述 +======= + +该驱动同时适用于具有5通道模拟数字转换器的低端 (EP9301, Ep9302) 设备和10通道 +触摸屏/模拟数字转换器的高端设备(EP9307, EP9312, EP9315)。 + +2. 通道编号 +=========== + +EP9301和EP9302数据表定义了通道0..4的编号方案。虽然EP9307, EP9312和EP9315多 +了3个通道(一共8个),但是编号并没有定义。所以说最后三个通道是随机编号的。 + +如果ep93xx_adc是IIO设备0,您将在以下位置找到条目 +/sys/bus/iio/devices/iio:device0/: + + +-----------------+---------------+ + | sysfs 入口 | ball/pin 名称 | + +=================+===============+ + | in_voltage0_raw | YM | + +-----------------+---------------+ + | in_voltage1_raw | SXP | + +-----------------+---------------+ + | in_voltage2_raw | SXM | + +-----------------+---------------+ + | in_voltage3_raw | SYP | + +-----------------+---------------+ + | in_voltage4_raw | SYM | + +-----------------+---------------+ + | in_voltage5_raw | XP | + +-----------------+---------------+ + | in_voltage6_raw | XM | + +-----------------+---------------+ + | in_voltage7_raw | YP | + +-----------------+---------------+ From 7720357d16a7cfe4c9efb596e7d47fc80236eaa8 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Thu, 21 Jan 2021 11:33:02 +0800 Subject: [PATCH 247/414] docs: zh_CN: add iio index.rst translation This patch translates Documentation/iio/index.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Reviewed-by: Huacai Chen Link: https://lore.kernel.org/r/20210121033302.558935-3-siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/iio/index.rst | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 Documentation/translations/zh_CN/iio/index.rst diff --git a/Documentation/translations/zh_CN/iio/index.rst b/Documentation/translations/zh_CN/iio/index.rst new file mode 100644 index 000000000000..7087076a10f6 --- /dev/null +++ b/Documentation/translations/zh_CN/iio/index.rst @@ -0,0 +1,20 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: :doc:`../../../iio/index` +:Translator: Yanteng Si + +.. _cn_iio_index: + + +======== +工业 I/O +======== + +.. toctree:: + :maxdepth: 1 + + iio_configfs + + ep93xx_adc From 798eb4cc64492c302d80a258a71a0802cf24be5a Mon Sep 17 00:00:00 2001 From: Milan Lakhani Date: Wed, 20 Jan 2021 13:31:51 +0000 Subject: [PATCH 248/414] docs: Update DTB format references There were two references to devicetree/booting-without-of.rst (which has been removed) for DTB format information, and devicetree/usage-model.rst pointed to https://elinux.org/Device_Tree_Usage. Change all three of these references to https://www.devicetree.org/specifications/. Signed-off-by: Milan Lakhani Link: https://lore.kernel.org/r/1611149511-4990-1-git-send-email-milan.lakhani@codethink.co.uk Signed-off-by: Jonathan Corbet --- Documentation/devicetree/usage-model.rst | 2 +- Documentation/translations/zh_CN/arm/Booting | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/usage-model.rst b/Documentation/devicetree/usage-model.rst index e1b42dc63f01..1eb83496ca1e 100644 --- a/Documentation/devicetree/usage-model.rst +++ b/Documentation/devicetree/usage-model.rst @@ -12,7 +12,7 @@ This article describes how Linux uses the device tree. An overview of the device tree data format can be found on the device tree usage page at devicetree.org\ [1]_. -.. [1] https://elinux.org/Device_Tree_Usage +.. [1] https://www.devicetree.org/specifications/ The "Open Firmware Device Tree", or simply Device Tree (DT), is a data structure and language for describing hardware. More specifically, it diff --git a/Documentation/translations/zh_CN/arm/Booting b/Documentation/translations/zh_CN/arm/Booting index c3d26ce5f6de..5ecea0767893 100644 --- a/Documentation/translations/zh_CN/arm/Booting +++ b/Documentation/translations/zh_CN/arm/Booting @@ -124,7 +124,7 @@ bootloader 必须传递一个系统内存的位置和最小值,以及根文件 bootloader 必须以 64bit 地址对齐的形式加载一个设备树映像(dtb)到系统 RAM 中,并用启动数据初始化它。dtb 格式在文档 -Documentation/devicetree/booting-without-of.rst 中。内核将会在 +https://www.devicetree.org/specifications/ 中。内核将会在 dtb 物理地址处查找 dtb 魔数值(0xd00dfeed),以确定 dtb 是否已经代替 标签列表被传递进来。 From c66cb171bc308c64083f3bb173152db68e06e79f Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Wed, 20 Jan 2021 13:26:47 +0000 Subject: [PATCH 249/414] Update Documentation/admin-guide/sysctl/fs.rst max_user_watches for epoll should say 1/25, rather than 1/32 Signed-off-by: Eric Curtin Link: https://lore.kernel.org/r/20210120132648.19046-1-ericcurtin17@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/fs.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst index f48277a0a850..2a501c9ddc55 100644 --- a/Documentation/admin-guide/sysctl/fs.rst +++ b/Documentation/admin-guide/sysctl/fs.rst @@ -380,5 +380,5 @@ This configuration option sets the maximum number of "watches" that are allowed for each user. Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes on a 64bit one. -The current default value for max_user_watches is the 1/32 of the available -low memory, divided for the "watch" cost in bytes. +The current default value for max_user_watches is the 1/25 (4%) of the +available low memory, divided for the "watch" cost in bytes. From 06a755d6269c072ed0c9b84227eaf33113dc243f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 18 Jan 2021 12:08:13 +0100 Subject: [PATCH 250/414] scripts/kernel-doc: add internal hyperlink to DOC: sections While DOC: section titles are not converted into RST headings sections and are only decorated with strong emphasis markup, nothing stops us from generating internal hyperlinks for them, to mimic implicit hyperlinks to RST headings. Signed-off-by: Michal Wajdeczko Cc: Jonathan Corbet Cc: Jani Nikula Cc: Chris Wilson Link: https://lore.kernel.org/r/20210118110813.1490-1-michal.wajdeczko@intel.com Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index a9a92e623dbc..e046e16e4411 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -836,6 +836,7 @@ sub output_blockhead_rst(%) { next if (defined($nosymbol_table{$section})); if ($output_selection != OUTPUT_INCLUDE) { + print ".. _$section:\n\n"; print "**$section**\n\n"; } print_lineno($section_start_lines{$section}); From 20ccc8dd38a391daba3a5a5dbd5443855100c517 Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Wed, 13 Jan 2021 09:03:10 +1000 Subject: [PATCH 251/414] Documentation: input: define ABS_PRESSURE/ABS_MT_PRESSURE resolution as grams ABS_PRESSURE and ABS_MT_PRESSURE on touch devices usually represent contact size (as a finger flattens with higher pressure the contact size increases) and userspace translates the kernel pressure value back into contact size. For example, libinput has pressure thresholds when a touch is considered a palm (palm == large contact area -> high pressure). The values themselves are on an arbitrary scale and device-specific. On pressurepads however, the pressure axis may represent the real physical pressure. Pressurepads are touchpads without a hinge but an actual pressure sensor underneath the device instead, for example the Lenovo Yoga 9i. A high-enough pressure is converted to a button click by the firmware. Microsoft does not require a pressure axis to be present, see [1], so as seen from userspace most pressurepads are identical to clickpads - one button and INPUT_PROP_BUTTONPAD set. However, pressurepads that export the pressure axis break userspace because that axis no longer represents contact size, resulting in inconsistent touch tracking, e.g. [2]. Userspace needs to know when a pressure axis represents real pressure and the best way to do so is to define what the resolution field means. Userspace can then treat data with a pressure resolution as true pressure. This patch documents that the pressure resolution is in units/gram. This allows for fine-grained detail and tops out at roughly ~2000t, enough for the devices we're dealing with. Grams is not a scientific pressure unit but the alternative is: - Pascal: defined as force per area and area is unreliable on many devices and seems like the wrong option here anyway, especially for devices with a single pressure sensor only. - Newton: defined as mass * distance/acceleration and for the purposes of a pressure axis, the distance is tricky to interpret and we get the data to calculate acceleration from event timestamps anyway. For the purposes of touch devices and digitizers, grams seems the best choice and the easiest to interpret. Bonus side effect: we can use the existing hwdb infrastructure in userspace to fix devices that advertise false pressure. [1] https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-precision-touchpad-required-hid-top-level-collections#windows-precision-touchpad-input-reports [2] https://gitlab.freedesktop.org/libinput/libinput/-/issues/562 Signed-off-by: Peter Hutterer Acked-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20210112230310.GA149342@jelly Signed-off-by: Jonathan Corbet --- Documentation/input/event-codes.rst | 15 +++++++++++++++ Documentation/input/multi-touch-protocol.rst | 4 ++++ 2 files changed, 19 insertions(+) diff --git a/Documentation/input/event-codes.rst b/Documentation/input/event-codes.rst index b24b5343f5eb..3118fc1c1e26 100644 --- a/Documentation/input/event-codes.rst +++ b/Documentation/input/event-codes.rst @@ -236,6 +236,21 @@ A few EV_ABS codes have special meanings: - Used to describe multitouch input events. Please see multi-touch-protocol.txt for details. +* ABS_PRESSURE/ABS_MT_PRESSURE: + + - For touch devices, many devices converted contact size into pressure. + A finger flattens with pressure, causing a larger contact area and thus + pressure and contact size are directly related. This is not the case + for other devices, for example digitizers and touchpads with a true + pressure sensor ("pressure pads"). + + A device should set the resolution of the axis to indicate whether the + pressure is in measurable units. If the resolution is zero, the + pressure data is in arbitrary units. If the resolution is nonzero, the + pressure data is in units/gram. For example, a value of 10 with a + resolution of 1 represents 10 gram, a value of 10 with a resolution on + 1000 represents 10 microgram. + EV_SW ----- diff --git a/Documentation/input/multi-touch-protocol.rst b/Documentation/input/multi-touch-protocol.rst index 307fe22d9668..21c1e6a22888 100644 --- a/Documentation/input/multi-touch-protocol.rst +++ b/Documentation/input/multi-touch-protocol.rst @@ -260,6 +260,10 @@ ABS_MT_PRESSURE of TOUCH and WIDTH for pressure-based devices or any device with a spatial signal intensity distribution. + If the resolution is zero, the pressure data is in arbitrary units. + If the resolution is nonzero, the pressure data is in units/gram. See + :ref:`input-event-codes` for details. + ABS_MT_DISTANCE The distance, in surface units, between the contact and the surface. Zero distance means the contact is touching the surface. A positive number means From 28a77185f1cd0650b664f54614143aaaa3a7a615 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 26 Jan 2021 16:07:29 +0800 Subject: [PATCH 252/414] iommu/vt-d: Clear PRQ overflow only when PRQ is empty It is incorrect to always clear PRO when it's set w/o first checking whether the overflow condition has been cleared. Current code assumes that if an overflow condition occurs it must have been cleared by earlier loop. However since the code runs in a threaded context, the overflow condition could occur even after setting the head to the tail under some extreme condition. To be sane, we should read both head/tail again when seeing a pending PRO and only clear PRO after all pending PRs have been handled. Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/linux-iommu/MWHPR11MB18862D2EA5BD432BF22D99A48CA09@MWHPR11MB1886.namprd11.prod.outlook.com/ Link: https://lore.kernel.org/r/20210126080730.2232859-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 033b25886e57..d7c98c5fa4e7 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -1042,8 +1042,17 @@ static irqreturn_t prq_event_thread(int irq, void *d) * Clear the page request overflow bit and wake up all threads that * are waiting for the completion of this handling. */ - if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) - writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); + if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { + pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", + iommu->name); + head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; + tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; + if (head == tail) { + writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); + pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", + iommu->name); + } + } if (!completion_done(&iommu->prq_complete)) complete(&iommu->prq_complete); From 3aa7c62cb7d7986063f352d96e921ee2bf2d9749 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 26 Jan 2021 16:07:30 +0800 Subject: [PATCH 253/414] iommu/vt-d: Use INVALID response code instead of FAILURE The VT-d IOMMU response RESPONSE_FAILURE for a page request in below cases: - When it gets a Page_Request with no PASID; - When it gets a Page_Request with PASID that is not in use for this device. This is allowed by the spec, but IOMMU driver doesn't support such cases today. When the device receives RESPONSE_FAILURE, it sends the device state machine to HALT state. Now if we try to unload the driver, it hangs since the device doesn't send any outbound transactions to host when the driver is trying to clear things up. The only possible responses would be for invalidation requests. Let's use RESPONSE_INVALID instead for now, so that the device state machine doesn't enter HALT state. Suggested-by: Ashok Raj Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210126080730.2232859-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index d7c98c5fa4e7..574a7e657a9a 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -911,10 +911,8 @@ static irqreturn_t prq_event_thread(int irq, void *d) u64 address; handled = 1; - req = &iommu->prq[head / sizeof(*req)]; - - result = QI_RESP_FAILURE; + result = QI_RESP_INVALID; address = (u64)req->addr << VTD_PAGE_SHIFT; if (!req->pasid_present) { pr_err("%s: Page request without PASID: %08llx %08llx\n", @@ -952,7 +950,6 @@ static irqreturn_t prq_event_thread(int irq, void *d) rcu_read_unlock(); } - result = QI_RESP_INVALID; /* Since we're using init_mm.pgd directly, we should never take * any faults on kernel addresses. */ if (!svm->mm) From 815cdd86038b809ef9fc3d42aed1f248414df3ce Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 28 Jan 2021 22:02:59 +0900 Subject: [PATCH 254/414] iommu/ipmmu-vmsa: Refactor ipmmu_of_xlate() Refactor ipmmu_of_xlate() to improve readability/scalability. Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/1611838980-4940-2-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 51 ++++++++++++++------------------------ 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index d71f10257f15..96aee98889be 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -734,54 +734,41 @@ static int ipmmu_init_platform_device(struct device *dev, return 0; } -static const struct soc_device_attribute soc_rcar_gen3[] = { +static const struct soc_device_attribute soc_needs_opt_in[] = { + { .family = "R-Car Gen3", }, + { .family = "RZ/G2", }, + { /* sentinel */ } +}; + +static const struct soc_device_attribute soc_denylist[] = { { .soc_id = "r8a774a1", }, - { .soc_id = "r8a774b1", }, - { .soc_id = "r8a774c0", }, - { .soc_id = "r8a774e1", }, - { .soc_id = "r8a7795", }, - { .soc_id = "r8a77961", }, + { .soc_id = "r8a7795", .revision = "ES1.*" }, + { .soc_id = "r8a7795", .revision = "ES2.*" }, { .soc_id = "r8a7796", }, - { .soc_id = "r8a77965", }, - { .soc_id = "r8a77970", }, - { .soc_id = "r8a77990", }, - { .soc_id = "r8a77995", }, { /* sentinel */ } }; -static const struct soc_device_attribute soc_rcar_gen3_whitelist[] = { - { .soc_id = "r8a774b1", }, - { .soc_id = "r8a774c0", }, - { .soc_id = "r8a774e1", }, - { .soc_id = "r8a7795", .revision = "ES3.*" }, - { .soc_id = "r8a77961", }, - { .soc_id = "r8a77965", }, - { .soc_id = "r8a77990", }, - { .soc_id = "r8a77995", }, - { /* sentinel */ } +static const char * const devices_allowlist[] = { }; -static const char * const rcar_gen3_slave_whitelist[] = { -}; - -static bool ipmmu_slave_whitelist(struct device *dev) +static bool ipmmu_device_is_allowed(struct device *dev) { unsigned int i; /* - * For R-Car Gen3 use a white list to opt-in slave devices. + * R-Car Gen3 and RZ/G2 use the allow list to opt-in devices. * For Other SoCs, this returns true anyway. */ - if (!soc_device_match(soc_rcar_gen3)) + if (!soc_device_match(soc_needs_opt_in)) return true; - /* Check whether this R-Car Gen3 can use the IPMMU correctly or not */ - if (!soc_device_match(soc_rcar_gen3_whitelist)) + /* Check whether this SoC can use the IPMMU correctly or not */ + if (soc_device_match(soc_denylist)) return false; - /* Check whether this slave device can work with the IPMMU */ - for (i = 0; i < ARRAY_SIZE(rcar_gen3_slave_whitelist); i++) { - if (!strcmp(dev_name(dev), rcar_gen3_slave_whitelist[i])) + /* Check whether this device can work with the IPMMU */ + for (i = 0; i < ARRAY_SIZE(devices_allowlist); i++) { + if (!strcmp(dev_name(dev), devices_allowlist[i])) return true; } @@ -792,7 +779,7 @@ static bool ipmmu_slave_whitelist(struct device *dev) static int ipmmu_of_xlate(struct device *dev, struct of_phandle_args *spec) { - if (!ipmmu_slave_whitelist(dev)) + if (!ipmmu_device_is_allowed(dev)) return -ENODEV; iommu_fwspec_add_ids(dev, spec->args, 1); From cec0813da56aa5fcca8da839c76f58c8273a4c66 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 28 Jan 2021 22:03:00 +0900 Subject: [PATCH 255/414] iommu/ipmmu-vmsa: Allow SDHI devices Add SDHI devices into devices_allowlist. Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/1611838980-4940-3-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 96aee98889be..eaaec0a55cc6 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -749,6 +749,10 @@ static const struct soc_device_attribute soc_denylist[] = { }; static const char * const devices_allowlist[] = { + "ee100000.mmc", + "ee120000.mmc", + "ee140000.mmc", + "ee160000.mmc" }; static bool ipmmu_device_is_allowed(struct device *dev) From bca28426805dc3a87b3b0d2fd528caf1a3e1b119 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:42 +0800 Subject: [PATCH 256/414] dt-bindings: iommu: mediatek: Convert IOMMU to DT schema Convert MediaTek IOMMU to DT schema. Signed-off-by: Yong Wu Reviewed-by: Rob Herring Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-2-yong.wu@mediatek.com Signed-off-by: Will Deacon --- .../bindings/iommu/mediatek,iommu.txt | 105 ----------- .../bindings/iommu/mediatek,iommu.yaml | 167 ++++++++++++++++++ 2 files changed, 167 insertions(+), 105 deletions(-) delete mode 100644 Documentation/devicetree/bindings/iommu/mediatek,iommu.txt create mode 100644 Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt deleted file mode 100644 index ac949f7fe3d4..000000000000 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt +++ /dev/null @@ -1,105 +0,0 @@ -* Mediatek IOMMU Architecture Implementation - - Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U), and -this M4U have two generations of HW architecture. Generation one uses flat -pagetable, and only supports 4K size page mapping. Generation two uses the -ARM Short-Descriptor translation table format for address translation. - - About the M4U Hardware Block Diagram, please check below: - - EMI (External Memory Interface) - | - m4u (Multimedia Memory Management Unit) - | - +--------+ - | | - gals0-rx gals1-rx (Global Async Local Sync rx) - | | - | | - gals0-tx gals1-tx (Global Async Local Sync tx) - | | Some SoCs may have GALS. - +--------+ - | - SMI Common(Smart Multimedia Interface Common) - | - +----------------+------- - | | - | gals-rx There may be GALS in some larbs. - | | - | | - | gals-tx - | | - SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb). - (display) (vdec) - | | - | | - +-----+-----+ +----+----+ - | | | | | | - | | |... | | | ... There are different ports in each larb. - | | | | | | -OVL0 RDMA0 WDMA0 MC PP VLD - - As above, The Multimedia HW will go through SMI and M4U while it -access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain -smi local arbiter and smi common. It will control whether the Multimedia -HW should go though the m4u for translation or bypass it and talk -directly with EMI. And also SMI help control the power domain and clocks for -each local arbiter. - Normally we specify a local arbiter(larb) for each multimedia HW -like display, video decode, and camera. And there are different ports -in each larb. Take a example, There are many ports like MC, PP, VLD in the -video decode local arbiter, all these ports are according to the video HW. - In some SoCs, there may be a GALS(Global Async Local Sync) module between -smi-common and m4u, and additional GALS module between smi-larb and -smi-common. GALS can been seen as a "asynchronous fifo" which could help -synchronize for the modules in different clock frequency. - -Required properties: -- compatible : must be one of the following string: - "mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW. - "mediatek,mt2712-m4u" for mt2712 which uses generation two m4u HW. - "mediatek,mt6779-m4u" for mt6779 which uses generation two m4u HW. - "mediatek,mt7623-m4u", "mediatek,mt2701-m4u" for mt7623 which uses - generation one m4u HW. - "mediatek,mt8167-m4u" for mt8167 which uses generation two m4u HW. - "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW. - "mediatek,mt8183-m4u" for mt8183 which uses generation two m4u HW. -- reg : m4u register base and size. -- interrupts : the interrupt of m4u. -- clocks : must contain one entry for each clock-names. -- clock-names : Only 1 optional clock: - - "bclk": the block clock of m4u. - Here is the list which require this "bclk": - - mt2701, mt2712, mt7623 and mt8173. - Note that m4u use the EMI clock which always has been enabled before kernel - if there is no this "bclk". -- mediatek,larbs : List of phandle to the local arbiters in the current Socs. - Refer to bindings/memory-controllers/mediatek,smi-larb.txt. It must sort - according to the local arbiter index, like larb0, larb1, larb2... -- iommu-cells : must be 1. This is the mtk_m4u_id according to the HW. - Specifies the mtk_m4u_id as defined in - dt-binding/memory/mt2701-larb-port.h for mt2701, mt7623 - dt-binding/memory/mt2712-larb-port.h for mt2712, - dt-binding/memory/mt6779-larb-port.h for mt6779, - dt-binding/memory/mt8167-larb-port.h for mt8167, - dt-binding/memory/mt8173-larb-port.h for mt8173, and - dt-binding/memory/mt8183-larb-port.h for mt8183. - -Example: - iommu: iommu@10205000 { - compatible = "mediatek,mt8173-m4u"; - reg = <0 0x10205000 0 0x1000>; - interrupts = ; - clocks = <&infracfg CLK_INFRA_M4U>; - clock-names = "bclk"; - mediatek,larbs = <&larb0 &larb1 &larb2 &larb3 &larb4 &larb5>; - #iommu-cells = <1>; - }; - -Example for a client device: - display { - compatible = "mediatek,mt8173-disp"; - iommus = <&iommu M4U_PORT_DISP_OVL0>, - <&iommu M4U_PORT_DISP_RDMA0>; - ... - }; diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml new file mode 100644 index 000000000000..b9946809fc2b --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -0,0 +1,167 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iommu/mediatek,iommu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek IOMMU Architecture Implementation + +maintainers: + - Yong Wu + +description: |+ + Some MediaTek SOCs contain a Multimedia Memory Management Unit (M4U), and + this M4U have two generations of HW architecture. Generation one uses flat + pagetable, and only supports 4K size page mapping. Generation two uses the + ARM Short-Descriptor translation table format for address translation. + + About the M4U Hardware Block Diagram, please check below: + + EMI (External Memory Interface) + | + m4u (Multimedia Memory Management Unit) + | + +--------+ + | | + gals0-rx gals1-rx (Global Async Local Sync rx) + | | + | | + gals0-tx gals1-tx (Global Async Local Sync tx) + | | Some SoCs may have GALS. + +--------+ + | + SMI Common(Smart Multimedia Interface Common) + | + +----------------+------- + | | + | gals-rx There may be GALS in some larbs. + | | + | | + | gals-tx + | | + SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb). + (display) (vdec) + | | + | | + +-----+-----+ +----+----+ + | | | | | | + | | |... | | | ... There are different ports in each larb. + | | | | | | + OVL0 RDMA0 WDMA0 MC PP VLD + + As above, The Multimedia HW will go through SMI and M4U while it + access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain + smi local arbiter and smi common. It will control whether the Multimedia + HW should go though the m4u for translation or bypass it and talk + directly with EMI. And also SMI help control the power domain and clocks for + each local arbiter. + + Normally we specify a local arbiter(larb) for each multimedia HW + like display, video decode, and camera. And there are different ports + in each larb. Take a example, There are many ports like MC, PP, VLD in the + video decode local arbiter, all these ports are according to the video HW. + + In some SoCs, there may be a GALS(Global Async Local Sync) module between + smi-common and m4u, and additional GALS module between smi-larb and + smi-common. GALS can been seen as a "asynchronous fifo" which could help + synchronize for the modules in different clock frequency. + +properties: + compatible: + oneOf: + - enum: + - mediatek,mt2701-m4u # generation one + - mediatek,mt2712-m4u # generation two + - mediatek,mt6779-m4u # generation two + - mediatek,mt8167-m4u # generation two + - mediatek,mt8173-m4u # generation two + - mediatek,mt8183-m4u # generation two + + - description: mt7623 generation one + items: + - const: mediatek,mt7623-m4u + - const: mediatek,mt2701-m4u + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + items: + - description: bclk is the block clock. + + clock-names: + items: + - const: bclk + + mediatek,larbs: + $ref: /schemas/types.yaml#/definitions/phandle-array + minItems: 1 + maxItems: 16 + description: | + List of phandle to the local arbiters in the current Socs. + Refer to bindings/memory-controllers/mediatek,smi-larb.yaml. It must sort + according to the local arbiter index, like larb0, larb1, larb2... + + '#iommu-cells': + const: 1 + description: | + This is the mtk_m4u_id according to the HW. Specifies the mtk_m4u_id as + defined in + dt-binding/memory/mt2701-larb-port.h for mt2701 and mt7623, + dt-binding/memory/mt2712-larb-port.h for mt2712, + dt-binding/memory/mt6779-larb-port.h for mt6779, + dt-binding/memory/mt8167-larb-port.h for mt8167, + dt-binding/memory/mt8173-larb-port.h for mt8173, + dt-binding/memory/mt8183-larb-port.h for mt8183. + +required: + - compatible + - reg + - interrupts + - mediatek,larbs + - '#iommu-cells' + +allOf: + - if: + properties: + compatible: + contains: + enum: + - mediatek,mt2701-m4u + - mediatek,mt2712-m4u + - mediatek,mt8173-m4u + + then: + required: + - clocks + +additionalProperties: false + +examples: + - | + #include + #include + + iommu: iommu@10205000 { + compatible = "mediatek,mt8173-m4u"; + reg = <0x10205000 0x1000>; + interrupts = ; + clocks = <&infracfg CLK_INFRA_M4U>; + clock-names = "bclk"; + mediatek,larbs = <&larb0 &larb1 &larb2 + &larb3 &larb4 &larb5>; + #iommu-cells = <1>; + }; + + - | + #include + + /* Example for a client device */ + display { + compatible = "mediatek,mt8173-disp"; + iommus = <&iommu M4U_PORT_DISP_OVL0>, + <&iommu M4U_PORT_DISP_RDMA0>; + }; From 5cf482f2f7a658fe4f74e97ed7e6e7f8263df03f Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:43 +0800 Subject: [PATCH 257/414] dt-bindings: memory: mediatek: Add a common memory header file Put all the macros about smi larb/port togethers. Signed-off-by: Yong Wu Acked-by: Rob Herring Acked-by: Krzysztof Kozlowski Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-3-yong.wu@mediatek.com Signed-off-by: Will Deacon --- include/dt-bindings/memory/mt2712-larb-port.h | 2 +- include/dt-bindings/memory/mt6779-larb-port.h | 2 +- include/dt-bindings/memory/mt8167-larb-port.h | 2 +- include/dt-bindings/memory/mt8173-larb-port.h | 2 +- include/dt-bindings/memory/mt8183-larb-port.h | 2 +- include/dt-bindings/memory/mtk-memory-port.h | 15 +++++++++++++++ 6 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 include/dt-bindings/memory/mtk-memory-port.h diff --git a/include/dt-bindings/memory/mt2712-larb-port.h b/include/dt-bindings/memory/mt2712-larb-port.h index 6f9aa7349cef..83e8070b4c1c 100644 --- a/include/dt-bindings/memory/mt2712-larb-port.h +++ b/include/dt-bindings/memory/mt2712-larb-port.h @@ -6,7 +6,7 @@ #ifndef __DTS_IOMMU_PORT_MT2712_H #define __DTS_IOMMU_PORT_MT2712_H -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mt6779-larb-port.h b/include/dt-bindings/memory/mt6779-larb-port.h index 2ad0899fbf2f..91b0be285f41 100644 --- a/include/dt-bindings/memory/mt6779-larb-port.h +++ b/include/dt-bindings/memory/mt6779-larb-port.h @@ -7,7 +7,7 @@ #ifndef _DTS_IOMMU_PORT_MT6779_H_ #define _DTS_IOMMU_PORT_MT6779_H_ -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mt8167-larb-port.h b/include/dt-bindings/memory/mt8167-larb-port.h index 000fb299a408..13925c4fee00 100644 --- a/include/dt-bindings/memory/mt8167-larb-port.h +++ b/include/dt-bindings/memory/mt8167-larb-port.h @@ -8,7 +8,7 @@ #ifndef __DTS_IOMMU_PORT_MT8167_H #define __DTS_IOMMU_PORT_MT8167_H -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mt8173-larb-port.h b/include/dt-bindings/memory/mt8173-larb-port.h index 9f31ccfeca21..c2379b3236d6 100644 --- a/include/dt-bindings/memory/mt8173-larb-port.h +++ b/include/dt-bindings/memory/mt8173-larb-port.h @@ -6,7 +6,7 @@ #ifndef __DTS_IOMMU_PORT_MT8173_H #define __DTS_IOMMU_PORT_MT8173_H -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mt8183-larb-port.h b/include/dt-bindings/memory/mt8183-larb-port.h index 2c579f305162..de8bf81b5d9e 100644 --- a/include/dt-bindings/memory/mt8183-larb-port.h +++ b/include/dt-bindings/memory/mt8183-larb-port.h @@ -6,7 +6,7 @@ #ifndef __DTS_IOMMU_PORT_MT8183_H #define __DTS_IOMMU_PORT_MT8183_H -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mtk-memory-port.h b/include/dt-bindings/memory/mtk-memory-port.h new file mode 100644 index 000000000000..53354cf4f6e3 --- /dev/null +++ b/include/dt-bindings/memory/mtk-memory-port.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020 MediaTek Inc. + * Author: Yong Wu + */ +#ifndef __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_ +#define __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_ + +#define MTK_LARB_NR_MAX 16 + +#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0xf) +#define MTK_M4U_TO_PORT(id) ((id) & 0x1f) + +#endif From ca49a4b4c9895a873213ae93abae5855e8d226c6 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:44 +0800 Subject: [PATCH 258/414] dt-bindings: memory: mediatek: Extend LARB_NR_MAX to 32 Extend the max larb number definition as mt8192 has larb_nr over 16. Signed-off-by: Yong Wu Acked-by: Rob Herring Acked-by: Krzysztof Kozlowski Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-4-yong.wu@mediatek.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml | 2 +- include/dt-bindings/memory/mtk-memory-port.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml index b9946809fc2b..ba6626347381 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -99,7 +99,7 @@ properties: mediatek,larbs: $ref: /schemas/types.yaml#/definitions/phandle-array minItems: 1 - maxItems: 16 + maxItems: 32 description: | List of phandle to the local arbiters in the current Socs. Refer to bindings/memory-controllers/mediatek,smi-larb.yaml. It must sort diff --git a/include/dt-bindings/memory/mtk-memory-port.h b/include/dt-bindings/memory/mtk-memory-port.h index 53354cf4f6e3..7d64103209af 100644 --- a/include/dt-bindings/memory/mtk-memory-port.h +++ b/include/dt-bindings/memory/mtk-memory-port.h @@ -6,10 +6,10 @@ #ifndef __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_ #define __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_ -#define MTK_LARB_NR_MAX 16 +#define MTK_LARB_NR_MAX 32 #define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) -#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0xf) +#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0x1f) #define MTK_M4U_TO_PORT(id) ((id) & 0x1f) #endif From ddd3e349b8aad5b814d17fc3bcf7a5a90af6d296 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:45 +0800 Subject: [PATCH 259/414] dt-bindings: memory: mediatek: Rename header guard for SMI header file Only rename the header guard for all the SoC larb port header file. No funtional change. Suggested-by: Krzysztof Kozlowski Signed-off-by: Yong Wu Acked-by: Krzysztof Kozlowski Acked-by: Rob Herring Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-5-yong.wu@mediatek.com Signed-off-by: Will Deacon --- include/dt-bindings/memory/mt2701-larb-port.h | 4 ++-- include/dt-bindings/memory/mt2712-larb-port.h | 4 ++-- include/dt-bindings/memory/mt6779-larb-port.h | 4 ++-- include/dt-bindings/memory/mt8167-larb-port.h | 4 ++-- include/dt-bindings/memory/mt8173-larb-port.h | 4 ++-- include/dt-bindings/memory/mt8183-larb-port.h | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/dt-bindings/memory/mt2701-larb-port.h b/include/dt-bindings/memory/mt2701-larb-port.h index 2d85c2ec6cfd..25d03526f142 100644 --- a/include/dt-bindings/memory/mt2701-larb-port.h +++ b/include/dt-bindings/memory/mt2701-larb-port.h @@ -4,8 +4,8 @@ * Author: Honghui Zhang */ -#ifndef _MT2701_LARB_PORT_H_ -#define _MT2701_LARB_PORT_H_ +#ifndef _DT_BINDINGS_MEMORY_MT2701_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT2701_LARB_PORT_H_ /* * Mediatek m4u generation 1 such as mt2701 has flat m4u port numbers, diff --git a/include/dt-bindings/memory/mt2712-larb-port.h b/include/dt-bindings/memory/mt2712-larb-port.h index 83e8070b4c1c..e41a2841bcff 100644 --- a/include/dt-bindings/memory/mt2712-larb-port.h +++ b/include/dt-bindings/memory/mt2712-larb-port.h @@ -3,8 +3,8 @@ * Copyright (c) 2017 MediaTek Inc. * Author: Yong Wu */ -#ifndef __DTS_IOMMU_PORT_MT2712_H -#define __DTS_IOMMU_PORT_MT2712_H +#ifndef _DT_BINDINGS_MEMORY_MT2712_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT2712_LARB_PORT_H_ #include diff --git a/include/dt-bindings/memory/mt6779-larb-port.h b/include/dt-bindings/memory/mt6779-larb-port.h index 91b0be285f41..3fb438a96e35 100644 --- a/include/dt-bindings/memory/mt6779-larb-port.h +++ b/include/dt-bindings/memory/mt6779-larb-port.h @@ -4,8 +4,8 @@ * Author: Chao Hao */ -#ifndef _DTS_IOMMU_PORT_MT6779_H_ -#define _DTS_IOMMU_PORT_MT6779_H_ +#ifndef _DT_BINDINGS_MEMORY_MT6779_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT6779_LARB_PORT_H_ #include diff --git a/include/dt-bindings/memory/mt8167-larb-port.h b/include/dt-bindings/memory/mt8167-larb-port.h index 13925c4fee00..aae57d4824ca 100644 --- a/include/dt-bindings/memory/mt8167-larb-port.h +++ b/include/dt-bindings/memory/mt8167-larb-port.h @@ -5,8 +5,8 @@ * Author: Honghui Zhang * Author: Fabien Parent */ -#ifndef __DTS_IOMMU_PORT_MT8167_H -#define __DTS_IOMMU_PORT_MT8167_H +#ifndef _DT_BINDINGS_MEMORY_MT8167_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8167_LARB_PORT_H_ #include diff --git a/include/dt-bindings/memory/mt8173-larb-port.h b/include/dt-bindings/memory/mt8173-larb-port.h index c2379b3236d6..167a7fc51868 100644 --- a/include/dt-bindings/memory/mt8173-larb-port.h +++ b/include/dt-bindings/memory/mt8173-larb-port.h @@ -3,8 +3,8 @@ * Copyright (c) 2015-2016 MediaTek Inc. * Author: Yong Wu */ -#ifndef __DTS_IOMMU_PORT_MT8173_H -#define __DTS_IOMMU_PORT_MT8173_H +#ifndef _DT_BINDINGS_MEMORY_MT8173_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8173_LARB_PORT_H_ #include diff --git a/include/dt-bindings/memory/mt8183-larb-port.h b/include/dt-bindings/memory/mt8183-larb-port.h index de8bf81b5d9e..36abdf0ce5a2 100644 --- a/include/dt-bindings/memory/mt8183-larb-port.h +++ b/include/dt-bindings/memory/mt8183-larb-port.h @@ -3,8 +3,8 @@ * Copyright (c) 2018 MediaTek Inc. * Author: Yong Wu */ -#ifndef __DTS_IOMMU_PORT_MT8183_H -#define __DTS_IOMMU_PORT_MT8183_H +#ifndef _DT_BINDINGS_MEMORY_MT8183_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8183_LARB_PORT_H_ #include From fc3734698a435b301183acc8332f0a5fba868bc3 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:46 +0800 Subject: [PATCH 260/414] dt-bindings: mediatek: Add binding for mt8192 IOMMU This patch adds decriptions for mt8192 IOMMU and SMI. mt8192 also is MTK IOMMU gen2 which uses ARM Short-Descriptor translation table format. The M4U-SMI HW diagram is as below: EMI | M4U | ------------ SMI Common ------------ | +-------+------+------+----------------------+-------+ | | | | ...... | | | | | | | | larb0 larb1 larb2 larb4 ...... larb19 larb20 disp0 disp1 mdp vdec IPE IPE All the connections are HW fixed, SW can NOT adjust it. mt8192 M4U support 0~16GB iova range. we preassign different engines into different iova ranges: domain-id module iova-range larbs 0 disp 0 ~ 4G larb0/1 1 vcodec 4G ~ 8G larb4/5/7 2 cam/mdp 8G ~ 12G larb2/9/11/13/14/16/17/18/19/20 3 CCU0 0x4000_0000 ~ 0x43ff_ffff larb13: port 9/10 4 CCU1 0x4400_0000 ~ 0x47ff_ffff larb14: port 4/5 The iova range for CCU0/1(camera control unit) is HW requirement. Signed-off-by: Yong Wu Reviewed-by: Rob Herring Acked-by: Krzysztof Kozlowski Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-6-yong.wu@mediatek.com Signed-off-by: Will Deacon --- .../bindings/iommu/mediatek,iommu.yaml | 18 +- include/dt-bindings/memory/mt8192-larb-port.h | 243 ++++++++++++++++++ 2 files changed, 260 insertions(+), 1 deletion(-) create mode 100644 include/dt-bindings/memory/mt8192-larb-port.h diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml index ba6626347381..0f26fe14c8e2 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -76,6 +76,7 @@ properties: - mediatek,mt8167-m4u # generation two - mediatek,mt8173-m4u # generation two - mediatek,mt8183-m4u # generation two + - mediatek,mt8192-m4u # generation two - description: mt7623 generation one items: @@ -115,7 +116,11 @@ properties: dt-binding/memory/mt6779-larb-port.h for mt6779, dt-binding/memory/mt8167-larb-port.h for mt8167, dt-binding/memory/mt8173-larb-port.h for mt8173, - dt-binding/memory/mt8183-larb-port.h for mt8183. + dt-binding/memory/mt8183-larb-port.h for mt8183, + dt-binding/memory/mt8192-larb-port.h for mt8192. + + power-domains: + maxItems: 1 required: - compatible @@ -133,11 +138,22 @@ allOf: - mediatek,mt2701-m4u - mediatek,mt2712-m4u - mediatek,mt8173-m4u + - mediatek,mt8192-m4u then: required: - clocks + - if: + properties: + compatible: + enum: + - mediatek,mt8192-m4u + + then: + required: + - power-domains + additionalProperties: false examples: diff --git a/include/dt-bindings/memory/mt8192-larb-port.h b/include/dt-bindings/memory/mt8192-larb-port.h new file mode 100644 index 000000000000..23035a52c675 --- /dev/null +++ b/include/dt-bindings/memory/mt8192-larb-port.h @@ -0,0 +1,243 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020 MediaTek Inc. + * + * Author: Chao Hao + * Author: Yong Wu + */ +#ifndef _DT_BINDINGS_MEMORY_MT8192_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8192_LARB_PORT_H_ + +#include + +/* + * MM IOMMU supports 16GB dma address. + * + * The address will preassign like this: + * + * modules dma-address-region larbs-ports + * disp 0 ~ 4G larb0/1 + * vcodec 4G ~ 8G larb4/5/7 + * cam/mdp 8G ~ 12G larb2/9/11/13/14/16/17/18/19/20 + * CCU0 0x4000_0000 ~ 0x43ff_ffff larb13: port 9/10 + * CCU1 0x4400_0000 ~ 0x47ff_ffff larb14: port 4/5 + * + * larb3/6/8/10/12/15 is null. + */ + +/* larb0 */ +#define M4U_PORT_L0_DISP_POSTMASK0 MTK_M4U_ID(0, 0) +#define M4U_PORT_L0_OVL_RDMA0_HDR MTK_M4U_ID(0, 1) +#define M4U_PORT_L0_OVL_RDMA0 MTK_M4U_ID(0, 2) +#define M4U_PORT_L0_DISP_RDMA0 MTK_M4U_ID(0, 3) +#define M4U_PORT_L0_DISP_WDMA0 MTK_M4U_ID(0, 4) +#define M4U_PORT_L0_DISP_FAKE0 MTK_M4U_ID(0, 5) + +/* larb1 */ +#define M4U_PORT_L1_OVL_2L_RDMA0_HDR MTK_M4U_ID(1, 0) +#define M4U_PORT_L1_OVL_2L_RDMA2_HDR MTK_M4U_ID(1, 1) +#define M4U_PORT_L1_OVL_2L_RDMA0 MTK_M4U_ID(1, 2) +#define M4U_PORT_L1_OVL_2L_RDMA2 MTK_M4U_ID(1, 3) +#define M4U_PORT_L1_DISP_MDP_RDMA4 MTK_M4U_ID(1, 4) +#define M4U_PORT_L1_DISP_RDMA4 MTK_M4U_ID(1, 5) +#define M4U_PORT_L1_DISP_UFBC_WDMA0 MTK_M4U_ID(1, 6) +#define M4U_PORT_L1_DISP_FAKE1 MTK_M4U_ID(1, 7) + +/* larb2 */ +#define M4U_PORT_L2_MDP_RDMA0 MTK_M4U_ID(2, 0) +#define M4U_PORT_L2_MDP_RDMA1 MTK_M4U_ID(2, 1) +#define M4U_PORT_L2_MDP_WROT0 MTK_M4U_ID(2, 2) +#define M4U_PORT_L2_MDP_WROT1 MTK_M4U_ID(2, 3) +#define M4U_PORT_L2_MDP_DISP_FAKE0 MTK_M4U_ID(2, 4) + +/* larb3: null */ + +/* larb4 */ +#define M4U_PORT_L4_VDEC_MC_EXT MTK_M4U_ID(4, 0) +#define M4U_PORT_L4_VDEC_UFO_EXT MTK_M4U_ID(4, 1) +#define M4U_PORT_L4_VDEC_PP_EXT MTK_M4U_ID(4, 2) +#define M4U_PORT_L4_VDEC_PRED_RD_EXT MTK_M4U_ID(4, 3) +#define M4U_PORT_L4_VDEC_PRED_WR_EXT MTK_M4U_ID(4, 4) +#define M4U_PORT_L4_VDEC_PPWRAP_EXT MTK_M4U_ID(4, 5) +#define M4U_PORT_L4_VDEC_TILE_EXT MTK_M4U_ID(4, 6) +#define M4U_PORT_L4_VDEC_VLD_EXT MTK_M4U_ID(4, 7) +#define M4U_PORT_L4_VDEC_VLD2_EXT MTK_M4U_ID(4, 8) +#define M4U_PORT_L4_VDEC_AVC_MV_EXT MTK_M4U_ID(4, 9) +#define M4U_PORT_L4_VDEC_RG_CTRL_DMA_EXT MTK_M4U_ID(4, 10) + +/* larb5 */ +#define M4U_PORT_L5_VDEC_LAT0_VLD_EXT MTK_M4U_ID(5, 0) +#define M4U_PORT_L5_VDEC_LAT0_VLD2_EXT MTK_M4U_ID(5, 1) +#define M4U_PORT_L5_VDEC_LAT0_AVC_MV_EXT MTK_M4U_ID(5, 2) +#define M4U_PORT_L5_VDEC_LAT0_PRED_RD_EXT MTK_M4U_ID(5, 3) +#define M4U_PORT_L5_VDEC_LAT0_TILE_EXT MTK_M4U_ID(5, 4) +#define M4U_PORT_L5_VDEC_LAT0_WDMA_EXT MTK_M4U_ID(5, 5) +#define M4U_PORT_L5_VDEC_LAT0_RG_CTRL_DMA_EXT MTK_M4U_ID(5, 6) +#define M4U_PORT_L5_VDEC_UFO_ENC_EXT MTK_M4U_ID(5, 7) + +/* larb6: null */ + +/* larb7 */ +#define M4U_PORT_L7_VENC_RCPU MTK_M4U_ID(7, 0) +#define M4U_PORT_L7_VENC_REC MTK_M4U_ID(7, 1) +#define M4U_PORT_L7_VENC_BSDMA MTK_M4U_ID(7, 2) +#define M4U_PORT_L7_VENC_SV_COMV MTK_M4U_ID(7, 3) +#define M4U_PORT_L7_VENC_RD_COMV MTK_M4U_ID(7, 4) +#define M4U_PORT_L7_VENC_CUR_LUMA MTK_M4U_ID(7, 5) +#define M4U_PORT_L7_VENC_CUR_CHROMA MTK_M4U_ID(7, 6) +#define M4U_PORT_L7_VENC_REF_LUMA MTK_M4U_ID(7, 7) +#define M4U_PORT_L7_VENC_REF_CHROMA MTK_M4U_ID(7, 8) +#define M4U_PORT_L7_JPGENC_Y_RDMA MTK_M4U_ID(7, 9) +#define M4U_PORT_L7_JPGENC_Q_RDMA MTK_M4U_ID(7, 10) +#define M4U_PORT_L7_JPGENC_C_TABLE MTK_M4U_ID(7, 11) +#define M4U_PORT_L7_JPGENC_BSDMA MTK_M4U_ID(7, 12) +#define M4U_PORT_L7_VENC_SUB_R_LUMA MTK_M4U_ID(7, 13) +#define M4U_PORT_L7_VENC_SUB_W_LUMA MTK_M4U_ID(7, 14) + +/* larb8: null */ + +/* larb9 */ +#define M4U_PORT_L9_IMG_IMGI_D1 MTK_M4U_ID(9, 0) +#define M4U_PORT_L9_IMG_IMGBI_D1 MTK_M4U_ID(9, 1) +#define M4U_PORT_L9_IMG_DMGI_D1 MTK_M4U_ID(9, 2) +#define M4U_PORT_L9_IMG_DEPI_D1 MTK_M4U_ID(9, 3) +#define M4U_PORT_L9_IMG_ICE_D1 MTK_M4U_ID(9, 4) +#define M4U_PORT_L9_IMG_SMTI_D1 MTK_M4U_ID(9, 5) +#define M4U_PORT_L9_IMG_SMTO_D2 MTK_M4U_ID(9, 6) +#define M4U_PORT_L9_IMG_SMTO_D1 MTK_M4U_ID(9, 7) +#define M4U_PORT_L9_IMG_CRZO_D1 MTK_M4U_ID(9, 8) +#define M4U_PORT_L9_IMG_IMG3O_D1 MTK_M4U_ID(9, 9) +#define M4U_PORT_L9_IMG_VIPI_D1 MTK_M4U_ID(9, 10) +#define M4U_PORT_L9_IMG_SMTI_D5 MTK_M4U_ID(9, 11) +#define M4U_PORT_L9_IMG_TIMGO_D1 MTK_M4U_ID(9, 12) +#define M4U_PORT_L9_IMG_UFBC_W0 MTK_M4U_ID(9, 13) +#define M4U_PORT_L9_IMG_UFBC_R0 MTK_M4U_ID(9, 14) + +/* larb10: null */ + +/* larb11 */ +#define M4U_PORT_L11_IMG_IMGI_D1 MTK_M4U_ID(11, 0) +#define M4U_PORT_L11_IMG_IMGBI_D1 MTK_M4U_ID(11, 1) +#define M4U_PORT_L11_IMG_DMGI_D1 MTK_M4U_ID(11, 2) +#define M4U_PORT_L11_IMG_DEPI_D1 MTK_M4U_ID(11, 3) +#define M4U_PORT_L11_IMG_ICE_D1 MTK_M4U_ID(11, 4) +#define M4U_PORT_L11_IMG_SMTI_D1 MTK_M4U_ID(11, 5) +#define M4U_PORT_L11_IMG_SMTO_D2 MTK_M4U_ID(11, 6) +#define M4U_PORT_L11_IMG_SMTO_D1 MTK_M4U_ID(11, 7) +#define M4U_PORT_L11_IMG_CRZO_D1 MTK_M4U_ID(11, 8) +#define M4U_PORT_L11_IMG_IMG3O_D1 MTK_M4U_ID(11, 9) +#define M4U_PORT_L11_IMG_VIPI_D1 MTK_M4U_ID(11, 10) +#define M4U_PORT_L11_IMG_SMTI_D5 MTK_M4U_ID(11, 11) +#define M4U_PORT_L11_IMG_TIMGO_D1 MTK_M4U_ID(11, 12) +#define M4U_PORT_L11_IMG_UFBC_W0 MTK_M4U_ID(11, 13) +#define M4U_PORT_L11_IMG_UFBC_R0 MTK_M4U_ID(11, 14) +#define M4U_PORT_L11_IMG_WPE_RDMA1 MTK_M4U_ID(11, 15) +#define M4U_PORT_L11_IMG_WPE_RDMA0 MTK_M4U_ID(11, 16) +#define M4U_PORT_L11_IMG_WPE_WDMA MTK_M4U_ID(11, 17) +#define M4U_PORT_L11_IMG_MFB_RDMA0 MTK_M4U_ID(11, 18) +#define M4U_PORT_L11_IMG_MFB_RDMA1 MTK_M4U_ID(11, 19) +#define M4U_PORT_L11_IMG_MFB_RDMA2 MTK_M4U_ID(11, 20) +#define M4U_PORT_L11_IMG_MFB_RDMA3 MTK_M4U_ID(11, 21) +#define M4U_PORT_L11_IMG_MFB_RDMA4 MTK_M4U_ID(11, 22) +#define M4U_PORT_L11_IMG_MFB_RDMA5 MTK_M4U_ID(11, 23) +#define M4U_PORT_L11_IMG_MFB_WDMA0 MTK_M4U_ID(11, 24) +#define M4U_PORT_L11_IMG_MFB_WDMA1 MTK_M4U_ID(11, 25) + +/* larb12: null */ + +/* larb13 */ +#define M4U_PORT_L13_CAM_MRAWI MTK_M4U_ID(13, 0) +#define M4U_PORT_L13_CAM_MRAWO0 MTK_M4U_ID(13, 1) +#define M4U_PORT_L13_CAM_MRAWO1 MTK_M4U_ID(13, 2) +#define M4U_PORT_L13_CAM_CAMSV1 MTK_M4U_ID(13, 3) +#define M4U_PORT_L13_CAM_CAMSV2 MTK_M4U_ID(13, 4) +#define M4U_PORT_L13_CAM_CAMSV3 MTK_M4U_ID(13, 5) +#define M4U_PORT_L13_CAM_CAMSV4 MTK_M4U_ID(13, 6) +#define M4U_PORT_L13_CAM_CAMSV5 MTK_M4U_ID(13, 7) +#define M4U_PORT_L13_CAM_CAMSV6 MTK_M4U_ID(13, 8) +#define M4U_PORT_L13_CAM_CCUI MTK_M4U_ID(13, 9) +#define M4U_PORT_L13_CAM_CCUO MTK_M4U_ID(13, 10) +#define M4U_PORT_L13_CAM_FAKE MTK_M4U_ID(13, 11) + +/* larb14 */ +#define M4U_PORT_L14_CAM_RESERVE1 MTK_M4U_ID(14, 0) +#define M4U_PORT_L14_CAM_RESERVE2 MTK_M4U_ID(14, 1) +#define M4U_PORT_L14_CAM_RESERVE3 MTK_M4U_ID(14, 2) +#define M4U_PORT_L14_CAM_CAMSV0 MTK_M4U_ID(14, 3) +#define M4U_PORT_L14_CAM_CCUI MTK_M4U_ID(14, 4) +#define M4U_PORT_L14_CAM_CCUO MTK_M4U_ID(14, 5) + +/* larb15: null */ + +/* larb16 */ +#define M4U_PORT_L16_CAM_IMGO_R1_A MTK_M4U_ID(16, 0) +#define M4U_PORT_L16_CAM_RRZO_R1_A MTK_M4U_ID(16, 1) +#define M4U_PORT_L16_CAM_CQI_R1_A MTK_M4U_ID(16, 2) +#define M4U_PORT_L16_CAM_BPCI_R1_A MTK_M4U_ID(16, 3) +#define M4U_PORT_L16_CAM_YUVO_R1_A MTK_M4U_ID(16, 4) +#define M4U_PORT_L16_CAM_UFDI_R2_A MTK_M4U_ID(16, 5) +#define M4U_PORT_L16_CAM_RAWI_R2_A MTK_M4U_ID(16, 6) +#define M4U_PORT_L16_CAM_RAWI_R3_A MTK_M4U_ID(16, 7) +#define M4U_PORT_L16_CAM_AAO_R1_A MTK_M4U_ID(16, 8) +#define M4U_PORT_L16_CAM_AFO_R1_A MTK_M4U_ID(16, 9) +#define M4U_PORT_L16_CAM_FLKO_R1_A MTK_M4U_ID(16, 10) +#define M4U_PORT_L16_CAM_LCESO_R1_A MTK_M4U_ID(16, 11) +#define M4U_PORT_L16_CAM_CRZO_R1_A MTK_M4U_ID(16, 12) +#define M4U_PORT_L16_CAM_LTMSO_R1_A MTK_M4U_ID(16, 13) +#define M4U_PORT_L16_CAM_RSSO_R1_A MTK_M4U_ID(16, 14) +#define M4U_PORT_L16_CAM_AAHO_R1_A MTK_M4U_ID(16, 15) +#define M4U_PORT_L16_CAM_LSCI_R1_A MTK_M4U_ID(16, 16) + +/* larb17 */ +#define M4U_PORT_L17_CAM_IMGO_R1_B MTK_M4U_ID(17, 0) +#define M4U_PORT_L17_CAM_RRZO_R1_B MTK_M4U_ID(17, 1) +#define M4U_PORT_L17_CAM_CQI_R1_B MTK_M4U_ID(17, 2) +#define M4U_PORT_L17_CAM_BPCI_R1_B MTK_M4U_ID(17, 3) +#define M4U_PORT_L17_CAM_YUVO_R1_B MTK_M4U_ID(17, 4) +#define M4U_PORT_L17_CAM_UFDI_R2_B MTK_M4U_ID(17, 5) +#define M4U_PORT_L17_CAM_RAWI_R2_B MTK_M4U_ID(17, 6) +#define M4U_PORT_L17_CAM_RAWI_R3_B MTK_M4U_ID(17, 7) +#define M4U_PORT_L17_CAM_AAO_R1_B MTK_M4U_ID(17, 8) +#define M4U_PORT_L17_CAM_AFO_R1_B MTK_M4U_ID(17, 9) +#define M4U_PORT_L17_CAM_FLKO_R1_B MTK_M4U_ID(17, 10) +#define M4U_PORT_L17_CAM_LCESO_R1_B MTK_M4U_ID(17, 11) +#define M4U_PORT_L17_CAM_CRZO_R1_B MTK_M4U_ID(17, 12) +#define M4U_PORT_L17_CAM_LTMSO_R1_B MTK_M4U_ID(17, 13) +#define M4U_PORT_L17_CAM_RSSO_R1_B MTK_M4U_ID(17, 14) +#define M4U_PORT_L17_CAM_AAHO_R1_B MTK_M4U_ID(17, 15) +#define M4U_PORT_L17_CAM_LSCI_R1_B MTK_M4U_ID(17, 16) + +/* larb18 */ +#define M4U_PORT_L18_CAM_IMGO_R1_C MTK_M4U_ID(18, 0) +#define M4U_PORT_L18_CAM_RRZO_R1_C MTK_M4U_ID(18, 1) +#define M4U_PORT_L18_CAM_CQI_R1_C MTK_M4U_ID(18, 2) +#define M4U_PORT_L18_CAM_BPCI_R1_C MTK_M4U_ID(18, 3) +#define M4U_PORT_L18_CAM_YUVO_R1_C MTK_M4U_ID(18, 4) +#define M4U_PORT_L18_CAM_UFDI_R2_C MTK_M4U_ID(18, 5) +#define M4U_PORT_L18_CAM_RAWI_R2_C MTK_M4U_ID(18, 6) +#define M4U_PORT_L18_CAM_RAWI_R3_C MTK_M4U_ID(18, 7) +#define M4U_PORT_L18_CAM_AAO_R1_C MTK_M4U_ID(18, 8) +#define M4U_PORT_L18_CAM_AFO_R1_C MTK_M4U_ID(18, 9) +#define M4U_PORT_L18_CAM_FLKO_R1_C MTK_M4U_ID(18, 10) +#define M4U_PORT_L18_CAM_LCESO_R1_C MTK_M4U_ID(18, 11) +#define M4U_PORT_L18_CAM_CRZO_R1_C MTK_M4U_ID(18, 12) +#define M4U_PORT_L18_CAM_LTMSO_R1_C MTK_M4U_ID(18, 13) +#define M4U_PORT_L18_CAM_RSSO_R1_C MTK_M4U_ID(18, 14) +#define M4U_PORT_L18_CAM_AAHO_R1_C MTK_M4U_ID(18, 15) +#define M4U_PORT_L18_CAM_LSCI_R1_C MTK_M4U_ID(18, 16) + +/* larb19 */ +#define M4U_PORT_L19_IPE_DVS_RDMA MTK_M4U_ID(19, 0) +#define M4U_PORT_L19_IPE_DVS_WDMA MTK_M4U_ID(19, 1) +#define M4U_PORT_L19_IPE_DVP_RDMA MTK_M4U_ID(19, 2) +#define M4U_PORT_L19_IPE_DVP_WDMA MTK_M4U_ID(19, 3) + +/* larb20 */ +#define M4U_PORT_L20_IPE_FDVT_RDA MTK_M4U_ID(20, 0) +#define M4U_PORT_L20_IPE_FDVT_RDB MTK_M4U_ID(20, 1) +#define M4U_PORT_L20_IPE_FDVT_WRA MTK_M4U_ID(20, 2) +#define M4U_PORT_L20_IPE_FDVT_WRB MTK_M4U_ID(20, 3) +#define M4U_PORT_L20_IPE_RSC_RDMA0 MTK_M4U_ID(20, 4) +#define M4U_PORT_L20_IPE_RSC_WDMA MTK_M4U_ID(20, 5) + +#endif From 66a28915541549f0fe0f4c705dab77443f22b5d6 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:49 +0800 Subject: [PATCH 261/414] iommu/mediatek: Use the common mtk-memory-port.h Use the common memory header(larb-port) in the source code. Signed-off-by: Yong Wu Acked-by: Krzysztof Kozlowski Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-9-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 7 ------- drivers/iommu/mtk_iommu.h | 1 + drivers/memory/mtk-smi.c | 1 + include/soc/mediatek/smi.h | 2 -- 4 files changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 86ab577c9520..f594971dbeb2 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -103,13 +103,6 @@ #define MTK_PROTECT_PA_ALIGN 256 -/* - * Get the local arbiter ID and the portid within the larb arbiter - * from mtk_m4u_id which is defined by MTK_M4U_ID. - */ -#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0xf) -#define MTK_M4U_TO_PORT(id) ((id) & 0x1f) - #define HAS_4GB_MODE BIT(0) /* HW will use the EMI clock if there isn't the "bclk". */ #define HAS_BCLK BIT(1) diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index df32b3e3408b..c1584dea66cb 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -17,6 +17,7 @@ #include #include #include +#include #define MTK_LARB_COM_MAX 8 #define MTK_LARB_SUBCOM_MAX 4 diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index ac350f8d1e20..89f92fa2afa5 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -15,6 +15,7 @@ #include #include #include +#include /* mt8173 */ #define SMI_LARB_MMU_EN 0xf00 diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h index 5a34b87d89e3..9371bf572ab8 100644 --- a/include/soc/mediatek/smi.h +++ b/include/soc/mediatek/smi.h @@ -11,8 +11,6 @@ #ifdef CONFIG_MTK_SMI -#define MTK_LARB_NR_MAX 16 - #define MTK_SMI_MMU_EN(port) BIT(port) struct mtk_smi_larb_iommu { From 859da21112520829638a82f458ac2c8c2637e17d Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:50 +0800 Subject: [PATCH 262/414] iommu/io-pgtable-arm-v7s: Use ias to check the valid iova in unmap Use the ias for the valid iova checking in arm_v7s_unmap. This is a preparing patch for supporting iova 34bit for MediaTek. Signed-off-by: Yong Wu Reviewed-by: Robin Murphy Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-10-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 929cb1518db1..03f35065acff 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -711,7 +711,7 @@ static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, { struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); - if (WARN_ON(upper_32_bits(iova))) + if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) return 0; return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd); From 40596d2f2b6075f6c33180b2f55c814ff4885475 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:51 +0800 Subject: [PATCH 263/414] iommu/io-pgtable-arm-v7s: Extend PA34 for MediaTek MediaTek extend the bit5 in lvl1 and lvl2 descriptor as PA34. Signed-off-by: Yong Wu Acked-by: Will Deacon Reviewed-by: Robin Murphy Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-11-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 9 +++++++-- drivers/iommu/mtk_iommu.c | 2 +- include/linux/io-pgtable.h | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 03f35065acff..b1b2ff069040 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -112,9 +112,10 @@ #define ARM_V7S_TEX_MASK 0x7 #define ARM_V7S_ATTR_TEX(val) (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT) -/* MediaTek extend the two bits for PA 32bit/33bit */ +/* MediaTek extend the bits below for PA 32bit/33bit/34bit */ #define ARM_V7S_ATTR_MTK_PA_BIT32 BIT(9) #define ARM_V7S_ATTR_MTK_PA_BIT33 BIT(4) +#define ARM_V7S_ATTR_MTK_PA_BIT34 BIT(5) /* *well, except for TEX on level 2 large pages, of course :( */ #define ARM_V7S_CONT_PAGE_TEX_SHIFT 6 @@ -194,6 +195,8 @@ static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl, pte |= ARM_V7S_ATTR_MTK_PA_BIT32; if (paddr & BIT_ULL(33)) pte |= ARM_V7S_ATTR_MTK_PA_BIT33; + if (paddr & BIT_ULL(34)) + pte |= ARM_V7S_ATTR_MTK_PA_BIT34; return pte; } @@ -218,6 +221,8 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl, paddr |= BIT_ULL(32); if (pte & ARM_V7S_ATTR_MTK_PA_BIT33) paddr |= BIT_ULL(33); + if (pte & ARM_V7S_ATTR_MTK_PA_BIT34) + paddr |= BIT_ULL(34); return paddr; } @@ -748,7 +753,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, if (cfg->ias > ARM_V7S_ADDR_BITS) return NULL; - if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS)) + if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 35 : ARM_V7S_ADDR_BITS)) return NULL; if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index f594971dbeb2..485f3b6d1a21 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -300,7 +300,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) IO_PGTABLE_QUIRK_ARM_MTK_EXT, .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap, .ias = 32, - .oas = 34, + .oas = 35, .iommu_dev = data->dev, }; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 06753323a15e..d69234a4a65e 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -69,8 +69,8 @@ struct io_pgtable_cfg { * format, and/or requires some format-specific default value. * * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) MediaTek IOMMUs extend - * to support up to 34 bits PA where the bit32 and bit33 are - * encoded in the bit9 and bit4 of the PTE respectively. + * to support up to 35 bits PA where the bit32, bit33 and bit34 are + * encoded in the bit9, bit4 and bit5 of the PTE respectively. * * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs * on unmap, for DMA domains using the flush queue mechanism for From 00ab6f2d61583e072a0cced0420ee1134d853a35 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:52 +0800 Subject: [PATCH 264/414] iommu/io-pgtable-arm-v7s: Clarify LVL_SHIFT/BITS macro The current _ARM_V7S_LVL_BITS/ARM_V7S_LVL_SHIFT use a formula to calculate the corresponding value for level1 and level2 to pretend the code sane. Actually their level1 and level2 values are different from each other. This patch only clarify the two macro. No functional change. Suggested-by: Robin Murphy Signed-off-by: Yong Wu Reviewed-by: Robin Murphy Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-12-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index b1b2ff069040..a129988cc1a6 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -44,13 +44,11 @@ /* * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2, - * and 12 bits in a page. With some carefully-chosen coefficients we can - * hide the ugly inconsistencies behind these macros and at least let the - * rest of the code pretend to be somewhat sane. + * and 12 bits in a page. */ #define ARM_V7S_ADDR_BITS 32 -#define _ARM_V7S_LVL_BITS(lvl) (16 - (lvl) * 4) -#define ARM_V7S_LVL_SHIFT(lvl) (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl))) +#define _ARM_V7S_LVL_BITS(lvl) ((lvl) == 1 ? 12 : 8) +#define ARM_V7S_LVL_SHIFT(lvl) ((lvl) == 1 ? 20 : 12) #define ARM_V7S_TABLE_SHIFT 10 #define ARM_V7S_PTES_PER_LVL(lvl) (1 << _ARM_V7S_LVL_BITS(lvl)) From 468ea0bfaecd097c223e13400d8e8fbae75e1f68 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:53 +0800 Subject: [PATCH 265/414] iommu/io-pgtable-arm-v7s: Add cfg as a param in some macros Add "cfg" as a parameter for some macros. This is a preparing patch for mediatek extend the lvl1 pgtable. No functional change. Signed-off-by: Yong Wu Acked-by: Will Deacon Reviewed-by: Robin Murphy Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-13-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index a129988cc1a6..2e161f0a65f0 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -47,21 +47,21 @@ * and 12 bits in a page. */ #define ARM_V7S_ADDR_BITS 32 -#define _ARM_V7S_LVL_BITS(lvl) ((lvl) == 1 ? 12 : 8) +#define _ARM_V7S_LVL_BITS(lvl, cfg) ((lvl) == 1 ? 12 : 8) #define ARM_V7S_LVL_SHIFT(lvl) ((lvl) == 1 ? 20 : 12) #define ARM_V7S_TABLE_SHIFT 10 -#define ARM_V7S_PTES_PER_LVL(lvl) (1 << _ARM_V7S_LVL_BITS(lvl)) -#define ARM_V7S_TABLE_SIZE(lvl) \ - (ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte)) +#define ARM_V7S_PTES_PER_LVL(lvl, cfg) (1 << _ARM_V7S_LVL_BITS(lvl, cfg)) +#define ARM_V7S_TABLE_SIZE(lvl, cfg) \ + (ARM_V7S_PTES_PER_LVL(lvl, cfg) * sizeof(arm_v7s_iopte)) #define ARM_V7S_BLOCK_SIZE(lvl) (1UL << ARM_V7S_LVL_SHIFT(lvl)) #define ARM_V7S_LVL_MASK(lvl) ((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl))) #define ARM_V7S_TABLE_MASK ((u32)(~0U << ARM_V7S_TABLE_SHIFT)) -#define _ARM_V7S_IDX_MASK(lvl) (ARM_V7S_PTES_PER_LVL(lvl) - 1) -#define ARM_V7S_LVL_IDX(addr, lvl) ({ \ +#define _ARM_V7S_IDX_MASK(lvl, cfg) (ARM_V7S_PTES_PER_LVL(lvl, cfg) - 1) +#define ARM_V7S_LVL_IDX(addr, lvl, cfg) ({ \ int _l = lvl; \ - ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \ + ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \ }) /* @@ -237,7 +237,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, struct device *dev = cfg->iommu_dev; phys_addr_t phys; dma_addr_t dma; - size_t size = ARM_V7S_TABLE_SIZE(lvl); + size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg); void *table = NULL; if (lvl == 1) @@ -283,7 +283,7 @@ static void __arm_v7s_free_table(void *table, int lvl, { struct io_pgtable_cfg *cfg = &data->iop.cfg; struct device *dev = cfg->iommu_dev; - size_t size = ARM_V7S_TABLE_SIZE(lvl); + size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg); if (!cfg->coherent_walk) dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, @@ -427,7 +427,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, arm_v7s_iopte *tblp; size_t sz = ARM_V7S_BLOCK_SIZE(lvl); - tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl); + tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl, cfg); if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz, sz, lvl, tblp) != sz)) return -EINVAL; @@ -480,7 +480,7 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl); /* Find our entry at the current level */ - ptep += ARM_V7S_LVL_IDX(iova, lvl); + ptep += ARM_V7S_LVL_IDX(iova, lvl, cfg); /* If we can install a leaf entry at this level, then do so */ if (num_entries) @@ -547,7 +547,7 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop) struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop); int i; - for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) { + for (i = 0; i < ARM_V7S_PTES_PER_LVL(1, &data->iop.cfg); i++) { arm_v7s_iopte pte = data->pgd[i]; if (ARM_V7S_PTE_IS_TABLE(pte, 1)) @@ -599,9 +599,9 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, if (!tablep) return 0; /* Bytes unmapped */ - num_ptes = ARM_V7S_PTES_PER_LVL(2); + num_ptes = ARM_V7S_PTES_PER_LVL(2, cfg); num_entries = size >> ARM_V7S_LVL_SHIFT(2); - unmap_idx = ARM_V7S_LVL_IDX(iova, 2); + unmap_idx = ARM_V7S_LVL_IDX(iova, 2, cfg); pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg); if (num_entries > 1) @@ -643,7 +643,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, if (WARN_ON(lvl > 2)) return 0; - idx = ARM_V7S_LVL_IDX(iova, lvl); + idx = ARM_V7S_LVL_IDX(iova, lvl, &iop->cfg); ptep += idx; do { pte[i] = READ_ONCE(ptep[i]); @@ -729,7 +729,7 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, u32 mask; do { - ptep += ARM_V7S_LVL_IDX(iova, ++lvl); + ptep += ARM_V7S_LVL_IDX(iova, ++lvl, &data->iop.cfg); pte = READ_ONCE(*ptep); ptep = iopte_deref(pte, lvl, data); } while (ARM_V7S_PTE_IS_TABLE(pte, lvl)); @@ -771,8 +771,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, spin_lock_init(&data->split_lock); data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", - ARM_V7S_TABLE_SIZE(2), - ARM_V7S_TABLE_SIZE(2), + ARM_V7S_TABLE_SIZE(2, cfg), + ARM_V7S_TABLE_SIZE(2, cfg), ARM_V7S_TABLE_SLAB_FLAGS, NULL); if (!data->l2_tables) goto out_free_data; From f3a8a46d714fd67b856f09e4c783363076ebac62 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:54 +0800 Subject: [PATCH 266/414] iommu/io-pgtable-arm-v7s: Quad lvl1 pgtable for MediaTek The standard input iova bits is 32. MediaTek quad the lvl1 pagetable (4 * lvl1). No change for lvl2 pagetable. Then the iova bits can reach 34bit. Signed-off-by: Yong Wu Reviewed-by: Robin Murphy Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-14-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 2e161f0a65f0..d4004bcf333a 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -45,9 +45,10 @@ /* * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2, * and 12 bits in a page. + * MediaTek extend 2 bits to reach 34bits, 14 bits at lvl1 and 8 bits at lvl2. */ #define ARM_V7S_ADDR_BITS 32 -#define _ARM_V7S_LVL_BITS(lvl, cfg) ((lvl) == 1 ? 12 : 8) +#define _ARM_V7S_LVL_BITS(lvl, cfg) ((lvl) == 1 ? ((cfg)->ias - 20) : 8) #define ARM_V7S_LVL_SHIFT(lvl) ((lvl) == 1 ? 20 : 12) #define ARM_V7S_TABLE_SHIFT 10 @@ -61,7 +62,7 @@ #define _ARM_V7S_IDX_MASK(lvl, cfg) (ARM_V7S_PTES_PER_LVL(lvl, cfg) - 1) #define ARM_V7S_LVL_IDX(addr, lvl, cfg) ({ \ int _l = lvl; \ - ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \ + ((addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \ }) /* @@ -748,7 +749,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, { struct arm_v7s_io_pgtable *data; - if (cfg->ias > ARM_V7S_ADDR_BITS) + if (cfg->ias > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS)) return NULL; if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 35 : ARM_V7S_ADDR_BITS)) From 2f317da4332a145be76ccd49d0a38803b499c39c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:55 +0800 Subject: [PATCH 267/414] iommu/mediatek: Add a flag for iova 34bits case Add a HW flag for if the HW support 34bit IOVA. the previous SoC still use 32bit. normally the lvl1 pgtable size is 16KB when ias == 32. if ias == 34, lvl1 pgtable size is 16KB * 4. The purpose of this patch is to save 16KB*3 continuous memory for the previous SoC. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-15-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 485f3b6d1a21..bf1277d58121 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -112,6 +112,7 @@ #define HAS_SUB_COMM BIT(5) #define WR_THROT_EN BIT(6) #define HAS_LEGACY_IVRP_PADDR BIT(7) +#define IOVA_34_EN BIT(8) #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ ((((pdata)->flags) & (_x)) == (_x)) @@ -299,7 +300,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) IO_PGTABLE_QUIRK_NO_PERMS | IO_PGTABLE_QUIRK_ARM_MTK_EXT, .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap, - .ias = 32, + .ias = MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN) ? 34 : 32, .oas = 35, .iommu_dev = data->dev, }; From 9bdfe4c175c8a9ea4959f7ad087e3d66d3917bc8 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:56 +0800 Subject: [PATCH 268/414] iommu/mediatek: Update oas for v7s This patch only updates oas in different SoCs. If the SoC supports 4GB-mode and current dram size is 4GB, the oas is 33. otherwise, it's still 32. In the lastest SoC, the oas is 35bits. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-16-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index bf1277d58121..1c4af574f5f7 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -301,10 +301,14 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) IO_PGTABLE_QUIRK_ARM_MTK_EXT, .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap, .ias = MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN) ? 34 : 32, - .oas = 35, .iommu_dev = data->dev, }; + if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE)) + dom->cfg.oas = data->enable_4GB ? 33 : 32; + else + dom->cfg.oas = 35; + dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data); if (!dom->iop) { dev_err(data->dev, "Failed to alloc io pgtable\n"); From 7f37a91dc85e37ed06422c6e460724375a86f40c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:57 +0800 Subject: [PATCH 269/414] iommu/mediatek: Move hw_init into attach_device In attach device, it will update the pagetable base address register. Move the hw_init function also here. Then it only need call pm_runtime_get/put one time here if m4u has power domain. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-17-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 1c4af574f5f7..87c4626c9072 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -126,6 +126,8 @@ struct mtk_iommu_domain { static const struct iommu_ops mtk_iommu_ops; +static int mtk_iommu_hw_init(const struct mtk_iommu_data *data); + /* * In M4U 4GB mode, the physical address is remapped as below: * @@ -364,12 +366,15 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, { struct mtk_iommu_data *data = dev_iommu_priv_get(dev); struct mtk_iommu_domain *dom = to_mtk_domain(domain); + int ret; if (!data) return -ENODEV; - /* Update the pgtable base address register of the M4U HW */ - if (!data->m4u_dom) { + if (!data->m4u_dom) { /* Initialize the M4U HW */ + ret = mtk_iommu_hw_init(data); + if (ret) + return ret; data->m4u_dom = dom; writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, data->base + REG_MMU_PT_BASE_ADDR); @@ -724,10 +729,6 @@ static int mtk_iommu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); - ret = mtk_iommu_hw_init(data); - if (ret) - return ret; - ret = iommu_device_sysfs_add(&data->iommu, dev, NULL, "mtk-iommu.%pa", &ioaddr); if (ret) From 986d9ec5f176ff1a539e849d57b3d6ecc937c4a6 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:58 +0800 Subject: [PATCH 270/414] iommu/mediatek: Add error handle for mtk_iommu_probe In the original code, we lack the error handle. This patch adds them. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-18-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 87c4626c9072..189165e7a2ab 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -739,15 +739,30 @@ static int mtk_iommu_probe(struct platform_device *pdev) ret = iommu_device_register(&data->iommu); if (ret) - return ret; + goto out_sysfs_remove; spin_lock_init(&data->tlb_lock); list_add_tail(&data->list, &m4ulist); - if (!iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); + if (!iommu_present(&platform_bus_type)) { + ret = bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); + if (ret) + goto out_list_del; + } - return component_master_add_with_match(dev, &mtk_iommu_com_ops, match); + ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); + if (ret) + goto out_bus_set_null; + return ret; + +out_bus_set_null: + bus_set_iommu(&platform_bus_type, NULL); +out_list_del: + list_del(&data->list); + iommu_device_unregister(&data->iommu); +out_sysfs_remove: + iommu_device_sysfs_remove(&data->iommu); + return ret; } static int mtk_iommu_remove(struct platform_device *pdev) From baf94e6ebff9622f60c4a87ff59c85bf756b7e20 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:18:59 +0800 Subject: [PATCH 271/414] iommu/mediatek: Add device link for smi-common and m4u In the lastest SoC, M4U has its special power domain. thus, If the engine begin to work, it should help enable the power for M4U firstly. Currently if the engine work, it always enable the power/clocks for smi-larbs/smi-common. This patch adds device_link for smi-common and M4U. then, if smi-common power is enabled, the M4U power also is powered on automatically. Normally M4U connect with several smi-larbs and their smi-common always are the same, In this patch it get smi-common dev from the last smi-larb device, then add the device_link. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-19-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 27 ++++++++++++++++++++++++--- drivers/iommu/mtk_iommu.h | 1 + 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 189165e7a2ab..0fe7c1617dc3 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -633,6 +634,9 @@ static int mtk_iommu_probe(struct platform_device *pdev) { struct mtk_iommu_data *data; struct device *dev = &pdev->dev; + struct device_node *larbnode, *smicomm_node; + struct platform_device *plarbdev; + struct device_link *link; struct resource *res; resource_size_t ioaddr; struct component_match *match = NULL; @@ -699,8 +703,6 @@ static int mtk_iommu_probe(struct platform_device *pdev) return larb_nr; for (i = 0; i < larb_nr; i++) { - struct device_node *larbnode; - struct platform_device *plarbdev; u32 id; larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); @@ -727,12 +729,28 @@ static int mtk_iommu_probe(struct platform_device *pdev) compare_of, larbnode); } + /* Get smi-common dev from the last larb. */ + smicomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); + if (!smicomm_node) + return -EINVAL; + + plarbdev = of_find_device_by_node(smicomm_node); + of_node_put(smicomm_node); + data->smicomm_dev = &plarbdev->dev; + + link = device_link_add(data->smicomm_dev, dev, + DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); + if (!link) { + dev_err(dev, "Unable link %s.\n", dev_name(data->smicomm_dev)); + return -EINVAL; + } + platform_set_drvdata(pdev, data); ret = iommu_device_sysfs_add(&data->iommu, dev, NULL, "mtk-iommu.%pa", &ioaddr); if (ret) - return ret; + goto out_link_remove; iommu_device_set_ops(&data->iommu, &mtk_iommu_ops); iommu_device_set_fwnode(&data->iommu, &pdev->dev.of_node->fwnode); @@ -762,6 +780,8 @@ static int mtk_iommu_probe(struct platform_device *pdev) iommu_device_unregister(&data->iommu); out_sysfs_remove: iommu_device_sysfs_remove(&data->iommu); +out_link_remove: + device_link_remove(data->smicomm_dev, dev); return ret; } @@ -776,6 +796,7 @@ static int mtk_iommu_remove(struct platform_device *pdev) bus_set_iommu(&platform_bus_type, NULL); clk_disable_unprepare(data->bclk); + device_link_remove(data->smicomm_dev, &pdev->dev); devm_free_irq(&pdev->dev, data->irq, data); component_master_del(&pdev->dev, &mtk_iommu_com_ops); return 0; diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index c1584dea66cb..a9b79e118f02 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -68,6 +68,7 @@ struct mtk_iommu_data { struct iommu_device iommu; const struct mtk_iommu_plat_data *plat_data; + struct device *smicomm_dev; struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */ From 34665c7929fc27351ee3f45554e8991a6fd6e284 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:00 +0800 Subject: [PATCH 272/414] iommu/mediatek: Add pm runtime callback In pm runtime case, all the registers backup/restore and bclk are controlled in the pm_runtime callback, Rename the original suspend/resume to the runtime_suspend/resume. Use pm_runtime_force_suspend/resume as the normal suspend/resume. iommu should suspend after iommu consumer devices, thus use _LATE_. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-20-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 0fe7c1617dc3..3682137b789a 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -802,7 +802,7 @@ static int mtk_iommu_remove(struct platform_device *pdev) return 0; } -static int __maybe_unused mtk_iommu_suspend(struct device *dev) +static int __maybe_unused mtk_iommu_runtime_suspend(struct device *dev) { struct mtk_iommu_data *data = dev_get_drvdata(dev); struct mtk_iommu_suspend_reg *reg = &data->reg; @@ -820,7 +820,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev) return 0; } -static int __maybe_unused mtk_iommu_resume(struct device *dev) +static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev) { struct mtk_iommu_data *data = dev_get_drvdata(dev); struct mtk_iommu_suspend_reg *reg = &data->reg; @@ -848,7 +848,9 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) } static const struct dev_pm_ops mtk_iommu_pm_ops = { - SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume) + SET_RUNTIME_PM_OPS(mtk_iommu_runtime_suspend, mtk_iommu_runtime_resume, NULL) + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; static const struct mtk_iommu_plat_data mt2712_data = { From c0b57581b73be7b43f39e0dff201c93413f6a668 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:01 +0800 Subject: [PATCH 273/414] iommu/mediatek: Add power-domain operation In the previous SoC, the M4U HW is in the EMI power domain which is always on. the latest M4U is in the display power domain which may be turned on/off, thus we have to add pm_runtime interface for it. When the engine work, the engine always enable the power and clocks for smi-larb/smi-common, then the M4U's power will always be powered on automatically via the device link with smi-common. Note: we don't enable the M4U power in iommu_map/unmap for tlb flush. If its power already is on, of course it is ok. if the power is off, the main tlb will be reset while M4U power on, thus the tlb flush while m4u power off is unnecessary, just skip it. Therefore, we increase the ref_count for pm when pm status is ACTIVE, otherwise, skip it. Meanwhile, the tlb_flush_range is called so often, thus, update pm ref_count while the SoC has power-domain to avoid touch the dev->power.lock. and the tlb_flush_all only is called when boot, so no need check if the SoC has power-domain to keep code clean. There will be one case that pm runctime status is not expected when tlb flush. After boot, the display may call dma_alloc_attrs before it call pm_runtime_get(disp-dev), then the m4u's pm status is not active inside the dma_alloc_attrs. Since it only happens after boot, the tlb is clean at that time, I also think this is ok. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-21-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 41 +++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 3682137b789a..b9c63c8de33e 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -182,10 +182,15 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) { for_each_m4u(data) { + if (pm_runtime_get_if_in_use(data->dev) <= 0) + continue; + writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + data->plat_data->inv_sel_reg); writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); wmb(); /* Make sure the tlb flush all done */ + + pm_runtime_put(data->dev); } } @@ -193,11 +198,17 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, size_t granule, struct mtk_iommu_data *data) { + bool has_pm = !!data->dev->pm_domain; unsigned long flags; int ret; u32 tmp; for_each_m4u(data) { + if (has_pm) { + if (pm_runtime_get_if_in_use(data->dev) <= 0) + continue; + } + spin_lock_irqsave(&data->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + data->plat_data->inv_sel_reg); @@ -219,6 +230,9 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, /* Clear the CPE status */ writel_relaxed(0, data->base + REG_MMU_CPE_DONE); spin_unlock_irqrestore(&data->tlb_lock, flags); + + if (has_pm) + pm_runtime_put(data->dev); } } @@ -367,18 +381,27 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, { struct mtk_iommu_data *data = dev_iommu_priv_get(dev); struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct device *m4udev = data->dev; int ret; if (!data) return -ENODEV; if (!data->m4u_dom) { /* Initialize the M4U HW */ - ret = mtk_iommu_hw_init(data); - if (ret) + ret = pm_runtime_resume_and_get(m4udev); + if (ret < 0) return ret; + + ret = mtk_iommu_hw_init(data); + if (ret) { + pm_runtime_put(m4udev); + return ret; + } data->m4u_dom = dom; writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, data->base + REG_MMU_PT_BASE_ADDR); + + pm_runtime_put(m4udev); } mtk_iommu_config(data, dev, true); @@ -738,11 +761,13 @@ static int mtk_iommu_probe(struct platform_device *pdev) of_node_put(smicomm_node); data->smicomm_dev = &plarbdev->dev; + pm_runtime_enable(dev); + link = device_link_add(data->smicomm_dev, dev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); if (!link) { dev_err(dev, "Unable link %s.\n", dev_name(data->smicomm_dev)); - return -EINVAL; + goto out_runtime_disable; } platform_set_drvdata(pdev, data); @@ -782,6 +807,8 @@ static int mtk_iommu_probe(struct platform_device *pdev) iommu_device_sysfs_remove(&data->iommu); out_link_remove: device_link_remove(data->smicomm_dev, dev); +out_runtime_disable: + pm_runtime_disable(dev); return ret; } @@ -797,6 +824,7 @@ static int mtk_iommu_remove(struct platform_device *pdev) clk_disable_unprepare(data->bclk); device_link_remove(data->smicomm_dev, &pdev->dev); + pm_runtime_disable(&pdev->dev); devm_free_irq(&pdev->dev, data->irq, data); component_master_del(&pdev->dev, &mtk_iommu_com_ops); return 0; @@ -828,6 +856,9 @@ static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev) void __iomem *base = data->base; int ret; + /* Avoid first resume to affect the default value of registers below. */ + if (!m4u_dom) + return 0; ret = clk_prepare_enable(data->bclk); if (ret) { dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret); @@ -841,9 +872,7 @@ static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev) writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL); writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR); writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG); - if (m4u_dom) - writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, - base + REG_MMU_PT_BASE_ADDR); + writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, base + REG_MMU_PT_BASE_ADDR); return 0; } From bfed873114c5fbb8982113abbab17cde7788dcba Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:02 +0800 Subject: [PATCH 274/414] iommu/mediatek: Support up to 34bit iova in tlb flush If the iova is 34bit, the iova[32][33] is the bit0/1 in the tlb flush register. Add a new macro for this. In the macro, since (iova + size - 1) may be end with 0xfff, then the bit0/1 always is 1, thus add a mask. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-22-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index b9c63c8de33e..468be7ca62e4 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -129,6 +129,11 @@ static const struct iommu_ops mtk_iommu_ops; static int mtk_iommu_hw_init(const struct mtk_iommu_data *data); +#define MTK_IOMMU_TLB_ADDR(iova) ({ \ + dma_addr_t _addr = iova; \ + ((lower_32_bits(_addr) & GENMASK(31, 12)) | upper_32_bits(_addr));\ +}) + /* * In M4U 4GB mode, the physical address is remapped as below: * @@ -213,8 +218,9 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + data->plat_data->inv_sel_reg); - writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A); - writel_relaxed(iova + size - 1, + writel_relaxed(MTK_IOMMU_TLB_ADDR(iova), + data->base + REG_MMU_INVLD_START_A); + writel_relaxed(MTK_IOMMU_TLB_ADDR(iova + size - 1), data->base + REG_MMU_INVLD_END_A); writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE); From ef0f0986b670cb1627bff055102cab70a7d3852a Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:03 +0800 Subject: [PATCH 275/414] iommu/mediatek: Support report iova 34bit translation fault in ISR If the iova is over 32bit, the fault status register bit is a little different. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-23-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 468be7ca62e4..f3666b0d7577 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -3,6 +3,7 @@ * Copyright (c) 2015-2016 MediaTek Inc. * Author: Yong Wu */ +#include #include #include #include @@ -89,6 +90,9 @@ #define F_REG_MMU1_FAULT_MASK GENMASK(13, 7) #define REG_MMU0_FAULT_VA 0x13c +#define F_MMU_INVAL_VA_31_12_MASK GENMASK(31, 12) +#define F_MMU_INVAL_VA_34_32_MASK GENMASK(11, 9) +#define F_MMU_INVAL_PA_34_32_MASK GENMASK(8, 6) #define F_MMU_FAULT_VA_WRITE_BIT BIT(1) #define F_MMU_FAULT_VA_LAYER_BIT BIT(0) @@ -246,8 +250,9 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) { struct mtk_iommu_data *data = dev_id; struct mtk_iommu_domain *dom = data->m4u_dom; - u32 int_state, regval, fault_iova, fault_pa; unsigned int fault_larb, fault_port, sub_comm = 0; + u32 int_state, regval, va34_32, pa34_32; + u64 fault_iova, fault_pa; bool layer, write; /* Read error info from registers */ @@ -263,6 +268,14 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) } layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT; write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT; + if (MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN)) { + va34_32 = FIELD_GET(F_MMU_INVAL_VA_34_32_MASK, fault_iova); + pa34_32 = FIELD_GET(F_MMU_INVAL_PA_34_32_MASK, fault_iova); + fault_iova = fault_iova & F_MMU_INVAL_VA_31_12_MASK; + fault_iova |= (u64)va34_32 << 32; + fault_pa |= (u64)pa34_32 << 32; + } + fault_port = F_MMU_INT_ID_PORT_ID(regval); if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM)) { fault_larb = F_MMU_INT_ID_COMM_ID(regval); @@ -276,7 +289,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) { dev_err_ratelimited( data->dev, - "fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d layer=%d %s\n", + "fault type=0x%x iova=0x%llx pa=0x%llx larb=%d port=%d layer=%d %s\n", int_state, fault_iova, fault_pa, fault_larb, fault_port, layer, write ? "write" : "read"); } From 08500c43d4f70c1e8be6beda8a2aea0ecfb845d8 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:04 +0800 Subject: [PATCH 276/414] iommu/mediatek: Adjust the structure Add "struct mtk_iommu_data *" in the "struct mtk_iommu_domain", reduce the call mtk_iommu_get_m4u_data(). No functional change. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-24-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index f3666b0d7577..f1941608ccb7 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -126,6 +126,7 @@ struct mtk_iommu_domain { struct io_pgtable_cfg cfg; struct io_pgtable_ops *iop; + struct mtk_iommu_data *data; struct iommu_domain domain; }; @@ -351,6 +352,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) return -EINVAL; } + dom->data = data; /* Update our support page sizes bitmap */ dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; return 0; @@ -442,10 +444,9 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); /* The "4GB mode" M4U physically can not use the lower remap of Dram. */ - if (data->enable_4GB) + if (dom->data->enable_4GB) paddr |= BIT_ULL(32); /* Synchronize with the tlb_lock */ @@ -468,36 +469,37 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) { - mtk_iommu_tlb_flush_all(mtk_iommu_get_m4u_data()); + struct mtk_iommu_domain *dom = to_mtk_domain(domain); + + mtk_iommu_tlb_flush_all(dom->data); } static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + struct mtk_iommu_domain *dom = to_mtk_domain(domain); size_t length = gather->end - gather->start + 1; mtk_iommu_tlb_flush_range_sync(gather->start, length, gather->pgsize, - data); + dom->data); } static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) { - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + struct mtk_iommu_domain *dom = to_mtk_domain(domain); - mtk_iommu_tlb_flush_range_sync(iova, size, size, data); + mtk_iommu_tlb_flush_range_sync(iova, size, size, dom->data); } static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); phys_addr_t pa; pa = dom->iop->iova_to_phys(dom->iop, iova); - if (data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE) + if (dom->data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE) pa &= ~BIT_ULL(32); return pa; From 4f956c97d26be65aea580e943f470ec70f7b0bb6 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:05 +0800 Subject: [PATCH 277/414] iommu/mediatek: Move domain_finalise into attach_device Currently domain_alloc only has a parameter(type), We have no chance to input some special data. This patch moves the domain_finalise into attach_device which has the device information, then could update the domain's geometry.aperture ranges for each a device. Strictly, I should use the data from mtk_iommu_get_m4u_data as the parameter of mtk_iommu_domain_finalise in this patch. but dom->data only is used in tlb ops in which the data is get from the m4u_list, thus it is ok here. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-25-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index f1941608ccb7..d321d09ac4c2 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -328,10 +328,9 @@ static void mtk_iommu_config(struct mtk_iommu_data *data, } } -static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) +static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, + struct mtk_iommu_data *data) { - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); - dom->cfg = (struct io_pgtable_cfg) { .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | @@ -352,7 +351,6 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) return -EINVAL; } - dom->data = data; /* Update our support page sizes bitmap */ dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; return 0; @@ -369,30 +367,19 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) if (!dom) return NULL; - if (iommu_get_dma_cookie(&dom->domain)) - goto free_dom; - - if (mtk_iommu_domain_finalise(dom)) - goto put_dma_cookie; + if (iommu_get_dma_cookie(&dom->domain)) { + kfree(dom); + return NULL; + } dom->domain.geometry.aperture_start = 0; dom->domain.geometry.aperture_end = DMA_BIT_MASK(32); dom->domain.geometry.force_aperture = true; - return &dom->domain; - -put_dma_cookie: - iommu_put_dma_cookie(&dom->domain); -free_dom: - kfree(dom); - return NULL; } static void mtk_iommu_domain_free(struct iommu_domain *domain) { - struct mtk_iommu_domain *dom = to_mtk_domain(domain); - - free_io_pgtable_ops(dom->iop); iommu_put_dma_cookie(domain); kfree(to_mtk_domain(domain)); } @@ -408,6 +395,12 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, if (!data) return -ENODEV; + if (!dom->data) { + if (mtk_iommu_domain_finalise(dom, data)) + return -ENODEV; + dom->data = data; + } + if (!data->m4u_dom) { /* Initialize the M4U HW */ ret = pm_runtime_resume_and_get(m4udev); if (ret < 0) From b7875eb9458983ade0c780276b21cfbc0c22d70b Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:06 +0800 Subject: [PATCH 278/414] iommu/mediatek: Move geometry.aperture updating into domain_finalise Move the domain geometry.aperture updating into domain_finalise. This is a preparing patch for updating the domain region. We know the detailed iova region in the attach_device. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-26-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index d321d09ac4c2..309b06d5e1f9 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -353,6 +353,10 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, /* Update our support page sizes bitmap */ dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; + + dom->domain.geometry.aperture_start = 0; + dom->domain.geometry.aperture_end = DMA_BIT_MASK(32); + dom->domain.geometry.force_aperture = true; return 0; } @@ -372,9 +376,6 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) return NULL; } - dom->domain.geometry.aperture_start = 0; - dom->domain.geometry.aperture_end = DMA_BIT_MASK(32); - dom->domain.geometry.force_aperture = true; return &dom->domain; } From 585e58f498a2855dc7a8d351df34dd645447f337 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:07 +0800 Subject: [PATCH 279/414] iommu/mediatek: Add iova_region structure Add a new structure for the iova_region. Each a region will be a independent iommu domain. For the previous SoC, there is single iova region(0~4G). For the SoC that need support multi-domains, there will be several regions. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-27-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 19 +++++++++++++++++++ drivers/iommu/mtk_iommu.h | 5 +++++ 2 files changed, 24 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 309b06d5e1f9..6875ca1225f0 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -167,6 +167,15 @@ static LIST_HEAD(m4ulist); /* List all the M4U HWs */ #define for_each_m4u(data) list_for_each_entry(data, &m4ulist, list) +struct mtk_iommu_iova_region { + dma_addr_t iova_base; + unsigned long long size; +}; + +static const struct mtk_iommu_iova_region single_domain[] = { + {.iova_base = 0, .size = SZ_4G}, +}; + /* * There may be 1 or 2 M4U HWs, But we always expect they are in the same domain * for the performance. @@ -901,6 +910,8 @@ static const struct mtk_iommu_plat_data mt2712_data = { .m4u_plat = M4U_MT2712, .flags = HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}}, }; @@ -908,6 +919,8 @@ static const struct mtk_iommu_plat_data mt6779_data = { .m4u_plat = M4U_MT6779, .flags = HAS_SUB_COMM | OUT_ORDER_WR_EN | WR_THROT_EN, .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}, {3}, {5}, {7, 8}, {10}, {9}}, }; @@ -915,6 +928,8 @@ static const struct mtk_iommu_plat_data mt8167_data = { .m4u_plat = M4U_MT8167, .flags = RESET_AXI | HAS_LEGACY_IVRP_PADDR, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}}, /* Linear mapping. */ }; @@ -923,6 +938,8 @@ static const struct mtk_iommu_plat_data mt8173_data = { .flags = HAS_4GB_MODE | HAS_BCLK | RESET_AXI | HAS_LEGACY_IVRP_PADDR, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */ }; @@ -930,6 +947,8 @@ static const struct mtk_iommu_plat_data mt8183_data = { .m4u_plat = M4U_MT8183, .flags = RESET_AXI, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}}, }; diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index a9b79e118f02..118170af1974 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -45,10 +45,15 @@ enum mtk_iommu_plat { M4U_MT8183, }; +struct mtk_iommu_iova_region; + struct mtk_iommu_plat_data { enum mtk_iommu_plat m4u_plat; u32 flags; u32 inv_sel_reg; + + unsigned int iova_region_nr; + const struct mtk_iommu_iova_region *iova_region; unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX]; }; From 803cf9e5a6aa64e94f9c554190bc0031929f6857 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:08 +0800 Subject: [PATCH 280/414] iommu/mediatek: Add get_domain_id from dev->dma_range_map Add a new interface _get_domain_id from dev->dma_range_map, The iommu consumer device will use dma-ranges in dtsi node to indicate its dma address region requirement. In this iommu driver, we will get the requirement and decide which iova domain it should locate. In the lastest SoC, there will be several iova-regions(domains), we will compare and calculate which domain is right. If the start/end of device requirement equal some region. it is best fit of course. If it is inside some region, it is also ok. the iova requirement of a device should not be inside two or more regions. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-28-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 42 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 6875ca1225f0..8fc17158bc28 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -314,6 +315,36 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) return IRQ_HANDLED; } +static int mtk_iommu_get_domain_id(struct device *dev, + const struct mtk_iommu_plat_data *plat_data) +{ + const struct mtk_iommu_iova_region *rgn = plat_data->iova_region; + const struct bus_dma_region *dma_rgn = dev->dma_range_map; + int i, candidate = -1; + dma_addr_t dma_end; + + if (!dma_rgn || plat_data->iova_region_nr == 1) + return 0; + + dma_end = dma_rgn->dma_start + dma_rgn->size - 1; + for (i = 0; i < plat_data->iova_region_nr; i++, rgn++) { + /* Best fit. */ + if (dma_rgn->dma_start == rgn->iova_base && + dma_end == rgn->iova_base + rgn->size - 1) + return i; + /* ok if it is inside this region. */ + if (dma_rgn->dma_start >= rgn->iova_base && + dma_end < rgn->iova_base + rgn->size) + candidate = i; + } + + if (candidate >= 0) + return candidate; + dev_err(dev, "Can NOT find the iommu domain id(%pad 0x%llx).\n", + &dma_rgn->dma_start, dma_rgn->size); + return -EINVAL; +} + static void mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, bool enable) { @@ -400,11 +431,15 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, struct mtk_iommu_data *data = dev_iommu_priv_get(dev); struct mtk_iommu_domain *dom = to_mtk_domain(domain); struct device *m4udev = data->dev; - int ret; + int ret, domid; if (!data) return -ENODEV; + domid = mtk_iommu_get_domain_id(dev, data->plat_data); + if (domid < 0) + return domid; + if (!dom->data) { if (mtk_iommu_domain_finalise(dom, data)) return -ENODEV; @@ -534,10 +569,15 @@ static void mtk_iommu_release_device(struct device *dev) static struct iommu_group *mtk_iommu_device_group(struct device *dev) { struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + int domid; if (!data) return ERR_PTR(-ENODEV); + domid = mtk_iommu_get_domain_id(dev, data->plat_data); + if (domid < 0) + return ERR_PTR(domid); + /* All the client devices are in the same m4u iommu-group */ if (!data->m4u_group) { data->m4u_group = iommu_group_alloc(); From c3045f39244e90c4c45a404e35aa66403ca68815 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:09 +0800 Subject: [PATCH 281/414] iommu/mediatek: Support for multi domains Some HW IP(ex: CCU) require the special iova range. That means the iova got from dma_alloc_attrs for that devices must locate in his special range. In this patch, we prepare a iommu group(domain) for each a iova range requirement. Meanwhile we still use one pagetable which support 16GB iova. After this patch, If the iova range of a master is over 4G, the master should: a) Declare its special dma-ranges in its dtsi node. For example, If we preassign the iova 4G-8G for vcodec, then the vcodec dtsi node should add this: /* * iova start at 0x1_0000_0000, pa still start at 0x4000_0000 * size is 0x1_0000_0000. */ dma-ranges = <0x1 0x0 0x0 0x40000000 0x1 0x0>; /* 4G ~ 8G */ Note: we don't have a actual bus concept here. the master doesn't have its special parent node, thus this dma-ranges can only be put in the master's node. b) Update the dma_mask: dma_set_mask_and_coherent(dev, DMA_BIT_MASK(33)); Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-29-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 37 ++++++++++++++++++++++++++----------- drivers/iommu/mtk_iommu.h | 4 +++- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 8fc17158bc28..b42fd2535b77 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -369,8 +369,19 @@ static void mtk_iommu_config(struct mtk_iommu_data *data, } static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, - struct mtk_iommu_data *data) + struct mtk_iommu_data *data, + unsigned int domid) { + const struct mtk_iommu_iova_region *region; + + /* Use the exist domain as there is only one pgtable here. */ + if (data->m4u_dom) { + dom->iop = data->m4u_dom->iop; + dom->cfg = data->m4u_dom->cfg; + dom->domain.pgsize_bitmap = data->m4u_dom->cfg.pgsize_bitmap; + goto update_iova_region; + } + dom->cfg = (struct io_pgtable_cfg) { .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | @@ -394,8 +405,11 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, /* Update our support page sizes bitmap */ dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; - dom->domain.geometry.aperture_start = 0; - dom->domain.geometry.aperture_end = DMA_BIT_MASK(32); +update_iova_region: + /* Update the iova region for this domain */ + region = data->plat_data->iova_region + domid; + dom->domain.geometry.aperture_start = region->iova_base; + dom->domain.geometry.aperture_end = region->iova_base + region->size - 1; dom->domain.geometry.force_aperture = true; return 0; } @@ -441,7 +455,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, return domid; if (!dom->data) { - if (mtk_iommu_domain_finalise(dom, data)) + if (mtk_iommu_domain_finalise(dom, data, domid)) return -ENODEV; dom->data = data; } @@ -569,6 +583,7 @@ static void mtk_iommu_release_device(struct device *dev) static struct iommu_group *mtk_iommu_device_group(struct device *dev) { struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + struct iommu_group *group; int domid; if (!data) @@ -578,15 +593,15 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev) if (domid < 0) return ERR_PTR(domid); - /* All the client devices are in the same m4u iommu-group */ - if (!data->m4u_group) { - data->m4u_group = iommu_group_alloc(); - if (IS_ERR(data->m4u_group)) - dev_err(dev, "Failed to allocate M4U IOMMU group\n"); + group = data->m4u_group[domid]; + if (!group) { + group = iommu_group_alloc(); + if (!IS_ERR(group)) + data->m4u_group[domid] = group; } else { - iommu_group_ref_get(data->m4u_group); + iommu_group_ref_get(group); } - return data->m4u_group; + return group; } static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 118170af1974..6f2168e3222d 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -22,6 +22,8 @@ #define MTK_LARB_COM_MAX 8 #define MTK_LARB_SUBCOM_MAX 4 +#define MTK_IOMMU_GROUP_MAX 8 + struct mtk_iommu_suspend_reg { union { u32 standard_axi_mode;/* v1 */ @@ -67,7 +69,7 @@ struct mtk_iommu_data { phys_addr_t protect_base; /* protect memory base */ struct mtk_iommu_suspend_reg reg; struct mtk_iommu_domain *m4u_dom; - struct iommu_group *m4u_group; + struct iommu_group *m4u_group[MTK_IOMMU_GROUP_MAX]; bool enable_4GB; spinlock_t tlb_lock; /* lock for tlb range flush */ From ab1d5281a62bafbd611fffdce7eab6eb9577aa0d Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:10 +0800 Subject: [PATCH 282/414] iommu/mediatek: Add iova reserved function For multiple iommu_domains, we need to reserve some iova regions. Take a example, If the default iova region is 0 ~ 4G, but the 0x4000_0000 ~ 0x43ff_ffff is only for the special CCU0 domain. Thus we should exclude this region for the default iova region. Signed-off-by: Anan sun Signed-off-by: Chao Hao Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-30-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index b42fd2535b77..764dc0b93477 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -626,6 +626,35 @@ static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) return iommu_fwspec_add_ids(dev, args->args, 1); } +static void mtk_iommu_get_resv_regions(struct device *dev, + struct list_head *head) +{ + struct mtk_iommu_data *data = dev_iommu_priv_get(dev); + unsigned int domid = mtk_iommu_get_domain_id(dev, data->plat_data), i; + const struct mtk_iommu_iova_region *resv, *curdom; + struct iommu_resv_region *region; + int prot = IOMMU_WRITE | IOMMU_READ; + + if (domid < 0) + return; + curdom = data->plat_data->iova_region + domid; + for (i = 0; i < data->plat_data->iova_region_nr; i++) { + resv = data->plat_data->iova_region + i; + + /* Only reserve when the region is inside the current domain */ + if (resv->iova_base <= curdom->iova_base || + resv->iova_base + resv->size >= curdom->iova_base + curdom->size) + continue; + + region = iommu_alloc_resv_region(resv->iova_base, resv->size, + prot, IOMMU_RESV_RESERVED); + if (!region) + return; + + list_add_tail(®ion->list, head); + } +} + static const struct iommu_ops mtk_iommu_ops = { .domain_alloc = mtk_iommu_domain_alloc, .domain_free = mtk_iommu_domain_free, @@ -641,6 +670,8 @@ static const struct iommu_ops mtk_iommu_ops = { .release_device = mtk_iommu_release_device, .device_group = mtk_iommu_device_group, .of_xlate = mtk_iommu_of_xlate, + .get_resv_regions = mtk_iommu_get_resv_regions, + .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, }; From 8d2c749e5252466d8fc1917101f137966c61fd1c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:11 +0800 Subject: [PATCH 283/414] iommu/mediatek: Support master use iova over 32bit After extending v7s, our pagetable already support iova reach 16GB(34bit). the master got the iova via dma_alloc_attrs may reach 34bits, but its HW register still is 32bit. then how to set the bit32/bit33 iova? this depend on a SMI larb setting(bank_sel). we separate whole 16GB iova to four banks: bank: 0: 0~4G; 1: 4~8G; 2: 8-12G; 3: 12-16G; The bank number is (iova >> 32). We will preassign which bank the larbs belong to. currently we don't have a interface for master to adjust its bank number. Each a bank is a iova_region which is a independent iommu-domain. the iova range for each iommu-domain can't cross 4G. Signed-off-by: Yong Wu Acked-by: Krzysztof Kozlowski #for memory part Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-31-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 18 ++++++++++++------ drivers/memory/mtk-smi.c | 7 +++++++ include/soc/mediatek/smi.h | 1 + 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 764dc0b93477..7403a7cb90ea 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -345,21 +345,27 @@ static int mtk_iommu_get_domain_id(struct device *dev, return -EINVAL; } -static void mtk_iommu_config(struct mtk_iommu_data *data, - struct device *dev, bool enable) +static void mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, + bool enable, unsigned int domid) { struct mtk_smi_larb_iommu *larb_mmu; unsigned int larbid, portid; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + const struct mtk_iommu_iova_region *region; int i; for (i = 0; i < fwspec->num_ids; ++i) { larbid = MTK_M4U_TO_LARB(fwspec->ids[i]); portid = MTK_M4U_TO_PORT(fwspec->ids[i]); + larb_mmu = &data->larb_imu[larbid]; - dev_dbg(dev, "%s iommu port: %d\n", - enable ? "enable" : "disable", portid); + region = data->plat_data->iova_region + domid; + larb_mmu->bank[portid] = upper_32_bits(region->iova_base); + + dev_dbg(dev, "%s iommu for larb(%s) port %d dom %d bank %d.\n", + enable ? "enable" : "disable", dev_name(larb_mmu->dev), + portid, domid, larb_mmu->bank[portid]); if (enable) larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); @@ -477,7 +483,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, pm_runtime_put(m4udev); } - mtk_iommu_config(data, dev, true); + mtk_iommu_config(data, dev, true, domid); return 0; } @@ -489,7 +495,7 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain, if (!data) return; - mtk_iommu_config(data, dev, false); + mtk_iommu_config(data, dev, false, 0); } static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index 89f92fa2afa5..fae61c5fbd70 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -44,6 +44,10 @@ /* mt2712 */ #define SMI_LARB_NONSEC_CON(id) (0x380 + ((id) * 4)) #define F_MMU_EN BIT(0) +#define BANK_SEL(id) ({ \ + u32 _id = (id) & 0x3; \ + (_id << 8 | _id << 10 | _id << 12 | _id << 14); \ +}) /* SMI COMMON */ #define SMI_BUS_SEL 0x220 @@ -88,6 +92,7 @@ struct mtk_smi_larb { /* larb: local arbiter */ const struct mtk_smi_larb_gen *larb_gen; int larbid; u32 *mmu; + unsigned char *bank; }; static int mtk_smi_clk_enable(const struct mtk_smi *smi) @@ -154,6 +159,7 @@ mtk_smi_larb_bind(struct device *dev, struct device *master, void *data) if (dev == larb_mmu[i].dev) { larb->larbid = i; larb->mmu = &larb_mmu[i].mmu; + larb->bank = larb_mmu[i].bank; return 0; } } @@ -172,6 +178,7 @@ static void mtk_smi_larb_config_port_gen2_general(struct device *dev) for_each_set_bit(i, (unsigned long *)larb->mmu, 32) { reg = readl_relaxed(larb->base + SMI_LARB_NONSEC_CON(i)); reg |= F_MMU_EN; + reg |= BANK_SEL(larb->bank[i]); writel(reg, larb->base + SMI_LARB_NONSEC_CON(i)); } } diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h index 9371bf572ab8..4cf445dbbdaa 100644 --- a/include/soc/mediatek/smi.h +++ b/include/soc/mediatek/smi.h @@ -16,6 +16,7 @@ struct mtk_smi_larb_iommu { struct device *dev; unsigned int mmu; + unsigned char bank[32]; }; /* From 23357572bec6b7f900d855f6dda9c77bb6d1c2de Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:12 +0800 Subject: [PATCH 284/414] iommu/mediatek: Remove unnecessary check in attach_device This priv_data is set in the of_xlate. if of_xlate failed, it should not enter attach_device. remove the unnecessary check. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-32-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 7403a7cb90ea..084fb4394ffc 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -453,9 +453,6 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, struct device *m4udev = data->dev; int ret, domid; - if (!data) - return -ENODEV; - domid = mtk_iommu_get_domain_id(dev, data->plat_data); if (domid < 0) return domid; @@ -492,9 +489,6 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain, { struct mtk_iommu_data *data = dev_iommu_priv_get(dev); - if (!data) - return; - mtk_iommu_config(data, dev, false, 0); } From 9e3489e06fb94a7908c3c8f098c191ff711a8182 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:13 +0800 Subject: [PATCH 285/414] iommu/mediatek: Add mt8192 support Add mt8192 iommu support. Signed-off-by: Yong Wu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-33-yong.wu@mediatek.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 22 ++++++++++++++++++++++ drivers/iommu/mtk_iommu.h | 1 + 2 files changed, 23 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 084fb4394ffc..0ad14a7604b1 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -177,6 +177,16 @@ static const struct mtk_iommu_iova_region single_domain[] = { {.iova_base = 0, .size = SZ_4G}, }; +static const struct mtk_iommu_iova_region mt8192_multi_dom[] = { + { .iova_base = 0x0, .size = SZ_4G}, /* disp: 0 ~ 4G */ + #if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) + { .iova_base = SZ_4G, .size = SZ_4G}, /* vdec: 4G ~ 8G */ + { .iova_base = SZ_4G * 2, .size = SZ_4G}, /* CAM/MDP: 8G ~ 12G */ + { .iova_base = 0x240000000ULL, .size = 0x4000000}, /* CCU0 */ + { .iova_base = 0x244000000ULL, .size = 0x4000000}, /* CCU1 */ + #endif +}; + /* * There may be 1 or 2 M4U HWs, But we always expect they are in the same domain * for the performance. @@ -1038,12 +1048,24 @@ static const struct mtk_iommu_plat_data mt8183_data = { .larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}}, }; +static const struct mtk_iommu_plat_data mt8192_data = { + .m4u_plat = M4U_MT8192, + .flags = HAS_BCLK | HAS_SUB_COMM | OUT_ORDER_WR_EN | + WR_THROT_EN | IOVA_34_EN, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .iova_region = mt8192_multi_dom, + .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .larbid_remap = {{0}, {1}, {4, 5}, {7}, {2}, {9, 11, 19, 20}, + {0, 14, 16}, {0, 13, 18, 17}}, +}; + static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data}, { .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data}, { .compatible = "mediatek,mt8167-m4u", .data = &mt8167_data}, { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data}, { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data}, + { .compatible = "mediatek,mt8192-m4u", .data = &mt8192_data}, {} }; diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 6f2168e3222d..f81fa8862ed0 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -45,6 +45,7 @@ enum mtk_iommu_plat { M4U_MT8167, M4U_MT8173, M4U_MT8183, + M4U_MT8192, }; struct mtk_iommu_iova_region; From 6af4873852c471c910f06e6a695dfd2b1741ccab Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 11 Jan 2021 19:19:14 +0800 Subject: [PATCH 286/414] MAINTAINERS: Add entry for MediaTek IOMMU I am the author of MediaTek iommu driver, and will to maintain and develop it further. Add myself to cover these items. Signed-off-by: Yong Wu Reviewed-by: Chun-Kuang Hu Reviewed-by: Tomasz Figa Link: https://lore.kernel.org/r/20210111111914.22211-34-yong.wu@mediatek.com Signed-off-by: Will Deacon --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9..35bc20398139 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11182,6 +11182,15 @@ S: Maintained F: Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt F: drivers/i2c/busses/i2c-mt65xx.c +MEDIATEK IOMMU DRIVER +M: Yong Wu +L: iommu@lists.linux-foundation.org +L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) +S: Supported +F: Documentation/devicetree/bindings/iommu/mediatek* +F: drivers/iommu/mtk-iommu* +F: include/dt-bindings/memory/mt*-port.h + MEDIATEK JPEG DRIVER M: Rick Chang M: Bin Liu From 34eb9359c111fd17ef8fb1ba671102438a7810c6 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 1 Feb 2021 18:24:02 +0800 Subject: [PATCH 287/414] driver/perf: Remove ARM_SMMU_V3_PMU dependency on ARM_SMMU_V3 The ARM_SMMU_V3_PMU dependency on ARM_SMMU_V3_PMU was added with the idea that a SMMUv3 PMCG would only exist on a system with an associated SMMUv3. However it is not the job of Kconfig to make these sorts of decisions (even if it were true), so remove the dependency. Signed-off-by: John Garry Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/1612175042-56866-1-git-send-email-john.garry@huawei.com Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 3075cf171f78..77522e5efe11 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -62,7 +62,7 @@ config ARM_PMU_ACPI config ARM_SMMU_V3_PMU tristate "ARM SMMUv3 Performance Monitors Extension" - depends on ARM64 && ACPI && ARM_SMMU_V3 + depends on ARM64 && ACPI help Provides support for the ARM SMMUv3 Performance Monitor Counter Groups (PMCG), which provide monitoring of transactions passing From f546ff0c0c07969f2892db10f1fe029f841ddf10 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 1 Feb 2021 16:26:25 -0700 Subject: [PATCH 288/414] Move our minimum Sphinx version to 1.7 As promised, drop support for some ancient sphinx releases, along with a lot of the cruft that was required to make that support work. Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 75 ++--------------------------- Documentation/sphinx/cdomain.py | 8 +-- Documentation/sphinx/kernel_abi.py | 27 ++--------- Documentation/sphinx/kernel_feat.py | 25 ++-------- Documentation/sphinx/kerneldoc.py | 26 ++-------- Documentation/sphinx/kernellog.py | 26 +++------- 6 files changed, 21 insertions(+), 166 deletions(-) diff --git a/Documentation/conf.py b/Documentation/conf.py index 6a767294887e..5bd45d5fb0a0 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -31,7 +31,7 @@ from load_config import loadConfig # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -needs_sphinx = '1.3' +needs_sphinx = '1.7' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -112,19 +112,12 @@ if major >= 3: else: extensions.append('cdomain') - if major == 1 and minor < 7: - sys.stderr.write('WARNING: Sphinx 1.7 or greater will be required as of ' - 'the 5.12 release\n') # Ensure that autosectionlabel will produce unique names autosectionlabel_prefix_document = True autosectionlabel_maxdepth = 2 -# The name of the math extension changed on Sphinx 1.4 -if (major == 1 and minor > 3) or (major > 1): - extensions.append("sphinx.ext.imgmath") -else: - extensions.append("sphinx.ext.pngmath") +extensions.append("sphinx.ext.imgmath") # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -375,71 +368,9 @@ if cjk_cmd.find("Noto Sans CJK SC") >= 0: ''' # Fix reference escape troubles with Sphinx 1.4.x -if major == 1 and minor > 3: +if major == 1: latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n' -if major == 1 and minor <= 4: - latex_elements['preamble'] += '\\usepackage[margin=0.5in, top=1in, bottom=1in]{geometry}' -elif major == 1 and (minor > 5 or (minor == 5 and patch >= 3)): - latex_elements['sphinxsetup'] = 'hmargin=0.5in, vmargin=1in' - latex_elements['preamble'] += '\\fvset{fontsize=auto}\n' - -# Customize notice background colors on Sphinx < 1.6: -if major == 1 and minor < 6: - latex_elements['preamble'] += ''' - \\usepackage{ifthen} - - % Put notes in color and let them be inside a table - \\definecolor{NoteColor}{RGB}{204,255,255} - \\definecolor{WarningColor}{RGB}{255,204,204} - \\definecolor{AttentionColor}{RGB}{255,255,204} - \\definecolor{ImportantColor}{RGB}{192,255,204} - \\definecolor{OtherColor}{RGB}{204,204,204} - \\newlength{\\mynoticelength} - \\makeatletter\\newenvironment{coloredbox}[1]{% - \\setlength{\\fboxrule}{1pt} - \\setlength{\\fboxsep}{7pt} - \\setlength{\\mynoticelength}{\\linewidth} - \\addtolength{\\mynoticelength}{-2\\fboxsep} - \\addtolength{\\mynoticelength}{-2\\fboxrule} - \\begin{lrbox}{\\@tempboxa}\\begin{minipage}{\\mynoticelength}}{\\end{minipage}\\end{lrbox}% - \\ifthenelse% - {\\equal{\\py@noticetype}{note}}% - {\\colorbox{NoteColor}{\\usebox{\\@tempboxa}}}% - {% - \\ifthenelse% - {\\equal{\\py@noticetype}{warning}}% - {\\colorbox{WarningColor}{\\usebox{\\@tempboxa}}}% - {% - \\ifthenelse% - {\\equal{\\py@noticetype}{attention}}% - {\\colorbox{AttentionColor}{\\usebox{\\@tempboxa}}}% - {% - \\ifthenelse% - {\\equal{\\py@noticetype}{important}}% - {\\colorbox{ImportantColor}{\\usebox{\\@tempboxa}}}% - {\\colorbox{OtherColor}{\\usebox{\\@tempboxa}}}% - }% - }% - }% - }\\makeatother - - \\makeatletter - \\renewenvironment{notice}[2]{% - \\def\\py@noticetype{#1} - \\begin{coloredbox}{#1} - \\bf\\it - \\par\\strong{#2} - \\csname py@noticestart@#1\\endcsname - } - { - \\csname py@noticeend@\\py@noticetype\\endcsname - \\end{coloredbox} - } - \\makeatother - - ''' - # With Sphinx 1.6, it is possible to change the Bg color directly # by using: # \definecolor{sphinxnoteBgColor}{RGB}{204,255,255} diff --git a/Documentation/sphinx/cdomain.py b/Documentation/sphinx/cdomain.py index 014a5229e57a..ca8ac9e59ded 100644 --- a/Documentation/sphinx/cdomain.py +++ b/Documentation/sphinx/cdomain.py @@ -236,13 +236,7 @@ class CObject(Base_CObject): indextext = self.get_index_text(name) if indextext: - if major == 1 and minor < 4: - # indexnode's tuple changed in 1.4 - # https://github.com/sphinx-doc/sphinx/commit/e6a5a3a92e938fcd75866b4227db9e0524d58f7c - self.indexnode['entries'].append( - ('single', indextext, targetname, '')) - else: - self.indexnode['entries'].append( + self.indexnode['entries'].append( ('single', indextext, targetname, '', None)) class CDomain(Base_CDomain): diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index f3da859c9878..efe760e410c4 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -45,17 +45,7 @@ from docutils import nodes, statemachine from docutils.statemachine import ViewList from docutils.parsers.rst import directives, Directive from docutils.utils.error_reporting import ErrorString - -# -# AutodocReporter is only good up to Sphinx 1.7 -# -import sphinx - -Use_SSI = sphinx.__version__[:3] >= '1.7' -if Use_SSI: - from sphinx.util.docutils import switch_source_input -else: - from sphinx.ext.autodoc import AutodocReporter +from sphinx.util.docutils import switch_source_input __version__ = '1.0' @@ -179,16 +169,5 @@ class KernelCmd(Directive): return node.children def do_parse(self, content, node): - if Use_SSI: - with switch_source_input(self.state, content): - self.state.nested_parse(content, 0, node, match_titles=1) - else: - buf = self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter - - self.state.memo.title_styles = [] - self.state.memo.section_level = 0 - self.state.memo.reporter = AutodocReporter(content, self.state.memo.reporter) - try: - self.state.nested_parse(content, 0, node, match_titles=1) - finally: - self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter = buf + with switch_source_input(self.state, content): + self.state.nested_parse(content, 0, node, match_titles=1) diff --git a/Documentation/sphinx/kernel_feat.py b/Documentation/sphinx/kernel_feat.py index 2fee04f1dedd..c91ea2b27697 100644 --- a/Documentation/sphinx/kernel_feat.py +++ b/Documentation/sphinx/kernel_feat.py @@ -42,17 +42,7 @@ from docutils import nodes, statemachine from docutils.statemachine import ViewList from docutils.parsers.rst import directives, Directive from docutils.utils.error_reporting import ErrorString - -# -# AutodocReporter is only good up to Sphinx 1.7 -# -import sphinx - -Use_SSI = sphinx.__version__[:3] >= '1.7' -if Use_SSI: - from sphinx.util.docutils import switch_source_input -else: - from sphinx.ext.autodoc import AutodocReporter +from sphinx.util.docutils import switch_source_input __version__ = '1.0' @@ -154,16 +144,7 @@ class KernelFeat(Directive): buf = self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter - if Use_SSI: - with switch_source_input(self.state, content): - self.state.nested_parse(content, 0, node, match_titles=1) - else: - self.state.memo.title_styles = [] - self.state.memo.section_level = 0 - self.state.memo.reporter = AutodocReporter(content, self.state.memo.reporter) - try: - self.state.nested_parse(content, 0, node, match_titles=1) - finally: - self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter = buf + with switch_source_input(self.state, content): + self.state.nested_parse(content, 0, node, match_titles=1) return node.children diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py index e9857ab904f1..8189c33b9dda 100644 --- a/Documentation/sphinx/kerneldoc.py +++ b/Documentation/sphinx/kerneldoc.py @@ -37,18 +37,8 @@ import glob from docutils import nodes, statemachine from docutils.statemachine import ViewList from docutils.parsers.rst import directives, Directive - -# -# AutodocReporter is only good up to Sphinx 1.7 -# import sphinx - -Use_SSI = sphinx.__version__[:3] >= '1.7' -if Use_SSI: - from sphinx.util.docutils import switch_source_input -else: - from sphinx.ext.autodoc import AutodocReporter - +from sphinx.util.docutils import switch_source_input import kernellog __version__ = '1.0' @@ -163,18 +153,8 @@ class KernelDocDirective(Directive): return [nodes.error(None, nodes.paragraph(text = "kernel-doc missing"))] def do_parse(self, result, node): - if Use_SSI: - with switch_source_input(self.state, result): - self.state.nested_parse(result, 0, node, match_titles=1) - else: - save = self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter - self.state.memo.reporter = AutodocReporter(result, self.state.memo.reporter) - self.state.memo.title_styles, self.state.memo.section_level = [], 0 - try: - self.state.nested_parse(result, 0, node, match_titles=1) - finally: - self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter = save - + with switch_source_input(self.state, result): + self.state.nested_parse(result, 0, node, match_titles=1) def setup(app): app.add_config_value('kerneldoc_bin', None, 'env') diff --git a/Documentation/sphinx/kernellog.py b/Documentation/sphinx/kernellog.py index 8ac7d274f542..0bc00c138cad 100644 --- a/Documentation/sphinx/kernellog.py +++ b/Documentation/sphinx/kernellog.py @@ -4,29 +4,19 @@ # only goes back to 1.6. So here's a wrapper layer to keep around for # as long as we support 1.4. # +# We don't support 1.4 anymore, but we'll keep the wrappers around until +# we change all the code to not use them anymore :) +# import sphinx +from sphinx.util import logging -if sphinx.__version__[:3] >= '1.6': - UseLogging = True - from sphinx.util import logging - logger = logging.getLogger('kerneldoc') -else: - UseLogging = False +logger = logging.getLogger('kerneldoc') def warn(app, message): - if UseLogging: - logger.warning(message) - else: - app.warn(message) + logger.warning(message) def verbose(app, message): - if UseLogging: - logger.verbose(message) - else: - app.verbose(message) + logger.verbose(message) def info(app, message): - if UseLogging: - logger.info(message) - else: - app.info(message) + logger.info(message) From 4217e5074f33d855873370378d427e329b60a7b4 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 1 Feb 2021 17:17:14 -0700 Subject: [PATCH 289/414] Docs: drop Python 2 support The kernel build system as a whole is dropping support for Python 2, so we should do the same. The effects are rather small, especially considering that much of the deleted code was not doing anything under any version of Python anyway. Signed-off-by: Jonathan Corbet --- Documentation/sphinx/kfigure.py | 14 +------------- Documentation/sphinx/maintainers_include.py | 2 -- Documentation/sphinx/requirements.txt | 1 - Documentation/sphinx/rstFlatTable.py | 10 ---------- scripts/sphinx-pre-install | 4 ++-- 5 files changed, 3 insertions(+), 28 deletions(-) diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py index 788704886eec..3c78828330be 100644 --- a/Documentation/sphinx/kfigure.py +++ b/Documentation/sphinx/kfigure.py @@ -49,26 +49,14 @@ import os from os import path import subprocess from hashlib import sha1 -import sys - from docutils import nodes from docutils.statemachine import ViewList from docutils.parsers.rst import directives from docutils.parsers.rst.directives import images import sphinx - from sphinx.util.nodes import clean_astext -from six import iteritems - import kernellog -PY3 = sys.version_info[0] == 3 - -if PY3: - _unicode = str -else: - _unicode = unicode - # Get Sphinx version major, minor, patch = sphinx.version_info[:3] if major == 1 and minor > 3: @@ -540,7 +528,7 @@ def add_kernel_figure_to_std_domain(app, doctree): docname = app.env.docname labels = std.data["labels"] - for name, explicit in iteritems(doctree.nametypes): + for name, explicit in doctree.nametypes.items(): if not explicit: continue labelid = doctree.nameids[name] diff --git a/Documentation/sphinx/maintainers_include.py b/Documentation/sphinx/maintainers_include.py index dc8fed48d3c2..328b3631a585 100755 --- a/Documentation/sphinx/maintainers_include.py +++ b/Documentation/sphinx/maintainers_include.py @@ -61,8 +61,6 @@ class MaintainersInclude(Include): field_content = "" for line in open(path): - if sys.version_info.major == 2: - line = unicode(line, 'utf-8') # Have we reached the end of the preformatted Descriptions text? if descriptions and line.startswith('Maintainers'): descriptions = False diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt index 5030d346d23b..489f6626de67 100644 --- a/Documentation/sphinx/requirements.txt +++ b/Documentation/sphinx/requirements.txt @@ -1,4 +1,3 @@ docutils Sphinx==2.4.4 sphinx_rtd_theme -six diff --git a/Documentation/sphinx/rstFlatTable.py b/Documentation/sphinx/rstFlatTable.py index 2019a55f6b18..a3eea0bbe6ba 100755 --- a/Documentation/sphinx/rstFlatTable.py +++ b/Documentation/sphinx/rstFlatTable.py @@ -42,8 +42,6 @@ u""" # imports # ============================================================================== -import sys - from docutils import nodes from docutils.parsers.rst import directives, roles from docutils.parsers.rst.directives.tables import Table @@ -55,14 +53,6 @@ from docutils.utils import SystemMessagePropagation __version__ = '1.0' -PY3 = sys.version_info[0] == 3 -PY2 = sys.version_info[0] == 2 - -if PY3: - # pylint: disable=C0103, W0622 - unicode = str - basestring = str - # ============================================================================== def setup(app): # ============================================================================== diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install index 828a8615a918..b5f9fd5b2880 100755 --- a/scripts/sphinx-pre-install +++ b/scripts/sphinx-pre-install @@ -728,8 +728,8 @@ sub check_needs() $need_virtualenv = 1; } if ($1 < 3) { - # Complain if it finds python2 (or worse) - printf "Warning: python$1 support is deprecated. Use it with caution!\n"; + # Fail if it finds python2 (or worse) + die "Python 3 is required to build the kernel docs\n"; } } else { die "Warning: couldn't identify $python_cmd version!"; From e1ed66ac300373a8d7e8d95fd86ca522f36602d9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 30 Jan 2021 23:19:07 +0800 Subject: [PATCH 290/414] iommu/vt-d: Fix compile error [-Werror=implicit-function-declaration] trace_qi_submit() could be used when interrupt remapping is supported, but DMA remapping is not. In this case, the following compile error occurs. ../drivers/iommu/intel/dmar.c: In function 'qi_submit_sync': ../drivers/iommu/intel/dmar.c:1311:3: error: implicit declaration of function 'trace_qi_submit'; did you mean 'ftrace_nmi_exit'? [-Werror=implicit-function-declaration] trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1, ^~~~~~~~~~~~~~~ ftrace_nmi_exit Fixes: f2dd871799ba5 ("iommu/vt-d: Add qi_submit trace event") Reported-by: kernel test robot Reported-by: Randy Dunlap Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210130151907.3929148-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Makefile | 2 +- drivers/iommu/intel/iommu.c | 1 - include/trace/events/intel_iommu.h | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index fb8e1e8c8029..ae570810a35e 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o -obj-$(CONFIG_INTEL_IOMMU) += trace.o +obj-$(CONFIG_DMAR_TABLE) += trace.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index be85af612bc1..87d0ef79ba6a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -44,7 +44,6 @@ #include #include #include -#include #include "../irq_remapping.h" #include "pasid.h" diff --git a/include/trace/events/intel_iommu.h b/include/trace/events/intel_iommu.h index aad2ff0c1e2e..e801f4910522 100644 --- a/include/trace/events/intel_iommu.h +++ b/include/trace/events/intel_iommu.h @@ -6,7 +6,6 @@ * * Author: Lu Baolu */ -#ifdef CONFIG_INTEL_IOMMU #undef TRACE_SYSTEM #define TRACE_SYSTEM intel_iommu @@ -176,4 +175,3 @@ TRACE_EVENT(qi_submit, /* This part must be outside protection */ #include -#endif /* CONFIG_INTEL_IOMMU */ From b8437a3ef8c485903d05d1f261328aaf0c0a6cc2 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 1 Feb 2021 17:06:23 -0800 Subject: [PATCH 291/414] iommu: Properly pass gfp_t in _iommu_map() to avoid atomic sleeping Sleeping while atomic = bad. Let's fix an obvious typo to try to avoid it. The warning that was seen (on a downstream kernel with the problematic patch backported): BUG: sleeping function called from invalid context at mm/page_alloc.c:4726 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 9, name: ksoftirqd/0 CPU: 0 PID: 9 Comm: ksoftirqd/0 Not tainted 5.4.93-12508-gc10c93e28e39 #1 Call trace: dump_backtrace+0x0/0x154 show_stack+0x20/0x2c dump_stack+0xa0/0xfc ___might_sleep+0x11c/0x12c __might_sleep+0x50/0x84 __alloc_pages_nodemask+0xf8/0x2bc __arm_lpae_alloc_pages+0x48/0x1b4 __arm_lpae_map+0x124/0x274 __arm_lpae_map+0x1cc/0x274 arm_lpae_map+0x140/0x170 arm_smmu_map+0x78/0xbc __iommu_map+0xd4/0x210 _iommu_map+0x4c/0x84 iommu_map_atomic+0x44/0x58 __iommu_dma_map+0x8c/0xc4 iommu_dma_map_page+0xac/0xf0 Fixes: d8c1df02ac7f ("iommu: Move iotlb_sync_map out from __iommu_map") Signed-off-by: Douglas Anderson Reviewed-by: Yong Wu Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210201170611.1.I64a7b62579287d668d7c89e105dcedf45d641063@changeid Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3d099a31ddca..2b06b01850d5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2441,7 +2441,7 @@ static int _iommu_map(struct iommu_domain *domain, unsigned long iova, const struct iommu_ops *ops = domain->ops; int ret; - ret = __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); + ret = __iommu_map(domain, iova, paddr, size, prot, gfp); if (ret == 0 && ops->iotlb_sync_map) ops->iotlb_sync_map(domain, iova, size); From d286ac1d05210695c312b9018b3aa7c2048e9aca Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 27 Jan 2021 17:00:07 +0200 Subject: [PATCH 292/414] IB/mlx5: Return appropriate error code instead of ENOMEM When mlx5_ib_stage_init_init() fails, return the error code related to failure instead of -ENOMEM. Fixes: 16c1975f1032 ("IB/mlx5: Create profile infrastructure to add and remove stages") Link: https://lore.kernel.org/r/20210127150010.1876121-8-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ecbb8443b0ec..477c90bc5b0e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3998,8 +3998,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) err_mp: mlx5_ib_cleanup_multiport_master(dev); - - return -ENOMEM; + return err; } static int mlx5_ib_enable_driver(struct ib_device *dev) From 131be26750379592f0dd6244b2a90bbb504a10bb Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 27 Jan 2021 17:00:08 +0200 Subject: [PATCH 293/414] IB/cm: Avoid a loop when device has 255 ports When RDMA device has 255 ports, loop iterator i overflows. Due to which cm_add_one() port iterator loops infinitely. Use core provided port iterator to avoid the infinite loop. Fixes: a977049dacde ("[PATCH] IB: Add the kernel CM implementation") Link: https://lore.kernel.org/r/20210127150010.1876121-9-leon@kernel.org Signed-off-by: Mark Bloch Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 98165589c8ab..be996dba040c 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4333,7 +4333,7 @@ static int cm_add_one(struct ib_device *ib_device) unsigned long flags; int ret; int count = 0; - u8 i; + unsigned int i; cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt), GFP_KERNEL); @@ -4345,7 +4345,7 @@ static int cm_add_one(struct ib_device *ib_device) cm_dev->going_down = 0; set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); - for (i = 1; i <= ib_device->phys_port_cnt; i++) { + rdma_for_each_port (ib_device, i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4431,7 +4431,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) .clr_port_cap_mask = IB_PORT_CM_SUP }; unsigned long flags; - int i; + unsigned int i; write_lock_irqsave(&cm.device_lock, flags); list_del(&cm_dev->list); @@ -4441,7 +4441,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) cm_dev->going_down = 1; spin_unlock_irq(&cm.lock); - for (i = 1; i <= ib_device->phys_port_cnt; i++) { + rdma_for_each_port (ib_device, i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; From 6504c772551e809b4cc21fa720d6bba703d5c199 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 27 Jan 2021 17:00:09 +0200 Subject: [PATCH 294/414] IB/mlx4: Use port iterator and validation APIs Use IB core provided routine to check if the port is valid or not and to iterate over IB ports. Link: https://lore.kernel.org/r/20210127150010.1876121-10-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/infiniband/hw/mlx4/sysfs.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e3cd402c079a..f26a0d920842 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1699,7 +1699,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct mlx4_dev *dev = (to_mdev(qp->device))->dev; int is_bonded = mlx4_is_bonded(dev); - if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt) + if (!rdma_is_port_valid(qp->device, flow_attr->port)) return ERR_PTR(-EINVAL); if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 1b5891130aab..24ee79aa2122 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -798,7 +798,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device) int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) { - int i; + unsigned int i; int ret = 0; if (!mlx4_is_master(dev->dev)) @@ -817,7 +817,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) goto err_ports; } - for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) { + rdma_for_each_port(&dev->ib_dev, i) { ret = add_port_entries(dev, i); if (ret) goto err_add_entries; From 904f4f647ec3e5b94c58d0484c6e94332293bc01 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 27 Jan 2021 17:00:10 +0200 Subject: [PATCH 295/414] IB/core: Use valid port number to check link layer IB HCA port starts from 1. Use IB core provided port iterator API to avoid any assumption with start port number. Link: https://lore.kernel.org/r/20210127150010.1876121-11-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 9137a25bb521..28464c58738c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2248,7 +2248,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) struct ib_qp_init_attr init_attr = {}; struct ib_qp_attr attr = {}; int num_eth_ports = 0; - int port; + unsigned int port; /* If QP state >= init, it is assigned to a port and we can check this * port only. @@ -2263,7 +2263,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) } /* Can't get a quick answer, iterate over all ports */ - for (port = 0; port < qp->device->phys_port_cnt; port++) + rdma_for_each_port(qp->device, port) if (rdma_port_get_link_layer(qp->device, port) != IB_LINK_LAYER_INFINIBAND) num_eth_ports++; From d6fd59e14ed2975d9b372876c45a09d76d1e70c9 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 27 Jan 2021 17:00:03 +0200 Subject: [PATCH 296/414] IB/mlx5: Support default partition key for representor port Representor port has only one default pkey. Hence have simpler query pkey callback or it. Link: https://lore.kernel.org/r/20210127150010.1876121-4-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 477c90bc5b0e..40fb86db3376 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1400,6 +1400,16 @@ static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port, return ret; } +static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + /* Default special Pkey for representor device port as per the + * IB specification 1.3 section 10.9.1.2. + */ + *pkey = 0xffff; + return 0; +} + static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { @@ -4209,6 +4219,7 @@ static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { .get_port_immutable = mlx5_port_rep_immutable, .query_port = mlx5_ib_rep_query_port, + .query_pkey = mlx5_ib_rep_query_pkey, }; static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev) From ed8188a0c1f0f49739c727a53df1174826c1a80b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 4 Feb 2021 09:43:55 +0800 Subject: [PATCH 297/414] iommu/vt-d: Fix 'physical' typos Fix misspellings of "physical". Signed-off-by: Bjorn Helgaas Signed-off-by: Lu Baolu Link: https://lore.kernel.org/linux-iommu/20210126211738.2920789-1-helgaas@kernel.org Link: https://lore.kernel.org/r/20210204014401.2846425-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index ecb35fdff03e..f8ab154c979d 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -664,7 +664,7 @@ static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) * 7: super page * 8-10: available * 11: snoop behavior - * 12-63: Host physcial address + * 12-63: Host physical address */ struct dma_pte { u64 val; From ad3d19029979b19378ece2011fc8ce07be98c905 Mon Sep 17 00:00:00 2001 From: Kyung Min Park Date: Thu, 4 Feb 2021 09:43:56 +0800 Subject: [PATCH 298/414] iommu/vt-d: Audit IOMMU Capabilities and add helper functions Audit IOMMU Capability/Extended Capability and check if the IOMMUs have the consistent value for features. Report out or scale to the lowest supported when IOMMU features have incompatibility among IOMMUs. Report out features when below features are mismatched: - First Level 5 Level Paging Support (FL5LP) - First Level 1 GByte Page Support (FL1GP) - Read Draining (DRD) - Write Draining (DWD) - Page Selective Invalidation (PSI) - Zero Length Read (ZLR) - Caching Mode (CM) - Protected High/Low-Memory Region (PHMR/PLMR) - Required Write-Buffer Flushing (RWBF) - Advanced Fault Logging (AFL) - RID-PASID Support (RPS) - Scalable Mode Page Walk Coherency (SMPWC) - First Level Translation Support (FLTS) - Second Level Translation Support (SLTS) - No Write Flag Support (NWFS) - Second Level Accessed/Dirty Support (SLADS) - Virtual Command Support (VCS) - Scalable Mode Translation Support (SMTS) - Device TLB Invalidation Throttle (DIT) - Page Drain Support (PDS) - Process Address Space ID Support (PASID) - Extended Accessed Flag Support (EAFS) - Supervisor Request Support (SRS) - Execute Request Support (ERS) - Page Request Support (PRS) - Nested Translation Support (NEST) - Snoop Control (SC) - Pass Through (PT) - Device TLB Support (DT) - Queued Invalidation (QI) - Page walk Coherency (C) Set capability to the lowest supported when below features are mismatched: - Maximum Address Mask Value (MAMV) - Number of Fault Recording Registers (NFR) - Second Level Large Page Support (SLLPS) - Fault Recording Offset (FRO) - Maximum Guest Address Width (MGAW) - Supported Adjusted Guest Address Width (SAGAW) - Number of Domains supported (NDOMS) - Pasid Size Supported (PSS) - Maximum Handle Mask Value (MHMV) - IOTLB Register Offset (IRO) Signed-off-by: Kyung Min Park Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210130184452.31711-1-kyung.min.park@intel.com Link: https://lore.kernel.org/r/20210204014401.2846425-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Makefile | 2 +- drivers/iommu/intel/cap_audit.c | 185 ++++++++++++++++++++++++++++ drivers/iommu/intel/cap_audit.h | 110 +++++++++++++++++ drivers/iommu/intel/iommu.c | 9 ++ drivers/iommu/intel/irq_remapping.c | 8 ++ include/linux/intel-iommu.h | 39 +++--- 6 files changed, 334 insertions(+), 19 deletions(-) create mode 100644 drivers/iommu/intel/cap_audit.c create mode 100644 drivers/iommu/intel/cap_audit.h diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index ae570810a35e..ae236ec7d219 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o -obj-$(CONFIG_DMAR_TABLE) += trace.o +obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c new file mode 100644 index 000000000000..049bc0c76a00 --- /dev/null +++ b/drivers/iommu/intel/cap_audit.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * cap_audit.c - audit iommu capabilities for boot time and hot plug + * + * Copyright (C) 2021 Intel Corporation + * + * Author: Kyung Min Park + * Lu Baolu + */ + +#define pr_fmt(fmt) "DMAR: " fmt + +#include +#include "cap_audit.h" + +static u64 intel_iommu_cap_sanity; +static u64 intel_iommu_ecap_sanity; + +static inline void check_irq_capabilities(struct intel_iommu *a, + struct intel_iommu *b) +{ + CHECK_FEATURE_MISMATCH(a, b, cap, pi_support, CAP_PI_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, eim_support, ECAP_EIM_MASK); +} + +static inline void check_dmar_capabilities(struct intel_iommu *a, + struct intel_iommu *b) +{ + MINIMAL_FEATURE_IOMMU(b, cap, CAP_MAMV_MASK); + MINIMAL_FEATURE_IOMMU(b, cap, CAP_NFR_MASK); + MINIMAL_FEATURE_IOMMU(b, cap, CAP_SLLPS_MASK); + MINIMAL_FEATURE_IOMMU(b, cap, CAP_FRO_MASK); + MINIMAL_FEATURE_IOMMU(b, cap, CAP_MGAW_MASK); + MINIMAL_FEATURE_IOMMU(b, cap, CAP_SAGAW_MASK); + MINIMAL_FEATURE_IOMMU(b, cap, CAP_NDOMS_MASK); + MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_PSS_MASK); + MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_MHMV_MASK); + MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_IRO_MASK); + + CHECK_FEATURE_MISMATCH(a, b, cap, 5lp_support, CAP_FL5LP_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, fl1gp_support, CAP_FL1GP_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, read_drain, CAP_RD_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, write_drain, CAP_WD_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, pgsel_inv, CAP_PSI_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, zlr, CAP_ZLR_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, caching_mode, CAP_CM_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, phmr, CAP_PHMR_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, plmr, CAP_PLMR_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, rwbf, CAP_RWBF_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, afl, CAP_AFL_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, rps, ECAP_RPS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, smpwc, ECAP_SMPWC_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, flts, ECAP_FLTS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, slts, ECAP_SLTS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, nwfs, ECAP_NWFS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, slads, ECAP_SLADS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, vcs, ECAP_VCS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, smts, ECAP_SMTS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, pds, ECAP_PDS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, dit, ECAP_DIT_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, pasid, ECAP_PASID_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, eafs, ECAP_EAFS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, srs, ECAP_SRS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, ers, ECAP_ERS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, prs, ECAP_PRS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, nest, ECAP_NEST_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, mts, ECAP_MTS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, sc_support, ECAP_SC_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, pass_through, ECAP_PT_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, dev_iotlb_support, ECAP_DT_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, qis, ECAP_QI_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, coherent, ECAP_C_MASK); +} + +static int cap_audit_hotplug(struct intel_iommu *iommu, enum cap_audit_type type) +{ + bool mismatch = false; + u64 old_cap = intel_iommu_cap_sanity; + u64 old_ecap = intel_iommu_ecap_sanity; + + if (type == CAP_AUDIT_HOTPLUG_IRQR) { + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pi_support, CAP_PI_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eim_support, ECAP_EIM_MASK); + goto out; + } + + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, 5lp_support, CAP_FL5LP_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl1gp_support, CAP_FL1GP_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, read_drain, CAP_RD_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, write_drain, CAP_WD_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pgsel_inv, CAP_PSI_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, zlr, CAP_ZLR_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, caching_mode, CAP_CM_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, phmr, CAP_PHMR_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, plmr, CAP_PLMR_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, rwbf, CAP_RWBF_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, afl, CAP_AFL_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, rps, ECAP_RPS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smpwc, ECAP_SMPWC_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, flts, ECAP_FLTS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slts, ECAP_SLTS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nwfs, ECAP_NWFS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slads, ECAP_SLADS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, vcs, ECAP_VCS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smts, ECAP_SMTS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pds, ECAP_PDS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dit, ECAP_DIT_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pasid, ECAP_PASID_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eafs, ECAP_EAFS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, srs, ECAP_SRS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, ers, ECAP_ERS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, prs, ECAP_PRS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nest, ECAP_NEST_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, mts, ECAP_MTS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, sc_support, ECAP_SC_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pass_through, ECAP_PT_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dev_iotlb_support, ECAP_DT_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, qis, ECAP_QI_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, coherent, ECAP_C_MASK); + + /* Abort hot plug if the hot plug iommu feature is smaller than global */ + MINIMAL_FEATURE_HOTPLUG(iommu, cap, max_amask_val, CAP_MAMV_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, cap, num_fault_regs, CAP_NFR_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, cap, super_page_val, CAP_SLLPS_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, cap, fault_reg_offset, CAP_FRO_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, cap, mgaw, CAP_MGAW_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, cap, sagaw, CAP_SAGAW_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, cap, ndoms, CAP_NDOMS_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, ecap, pss, ECAP_PSS_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, ecap, max_handle_mask, ECAP_MHMV_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, ecap, iotlb_offset, ECAP_IRO_MASK, mismatch); + +out: + if (mismatch) { + intel_iommu_cap_sanity = old_cap; + intel_iommu_ecap_sanity = old_ecap; + return -EFAULT; + } + + return 0; +} + +static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type) +{ + struct dmar_drhd_unit *d; + struct intel_iommu *i; + + rcu_read_lock(); + if (list_empty(&dmar_drhd_units)) + goto out; + + for_each_active_iommu(i, d) { + if (!iommu) { + intel_iommu_ecap_sanity = i->ecap; + intel_iommu_cap_sanity = i->cap; + iommu = i; + continue; + } + + if (type == CAP_AUDIT_STATIC_DMAR) + check_dmar_capabilities(iommu, i); + else + check_irq_capabilities(iommu, i); + } + +out: + rcu_read_unlock(); + return 0; +} + +int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu) +{ + switch (type) { + case CAP_AUDIT_STATIC_DMAR: + case CAP_AUDIT_STATIC_IRQR: + return cap_audit_static(iommu, type); + case CAP_AUDIT_HOTPLUG_DMAR: + case CAP_AUDIT_HOTPLUG_IRQR: + return cap_audit_hotplug(iommu, type); + default: + break; + } + + return -EFAULT; +} diff --git a/drivers/iommu/intel/cap_audit.h b/drivers/iommu/intel/cap_audit.h new file mode 100644 index 000000000000..a6a1530441b7 --- /dev/null +++ b/drivers/iommu/intel/cap_audit.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * cap_audit.h - audit iommu capabilities header + * + * Copyright (C) 2021 Intel Corporation + * + * Author: Kyung Min Park + */ + +/* + * Capability Register Mask + */ +#define CAP_FL5LP_MASK BIT_ULL(60) +#define CAP_PI_MASK BIT_ULL(59) +#define CAP_FL1GP_MASK BIT_ULL(56) +#define CAP_RD_MASK BIT_ULL(55) +#define CAP_WD_MASK BIT_ULL(54) +#define CAP_MAMV_MASK GENMASK_ULL(53, 48) +#define CAP_NFR_MASK GENMASK_ULL(47, 40) +#define CAP_PSI_MASK BIT_ULL(39) +#define CAP_SLLPS_MASK GENMASK_ULL(37, 34) +#define CAP_FRO_MASK GENMASK_ULL(33, 24) +#define CAP_ZLR_MASK BIT_ULL(22) +#define CAP_MGAW_MASK GENMASK_ULL(21, 16) +#define CAP_SAGAW_MASK GENMASK_ULL(12, 8) +#define CAP_CM_MASK BIT_ULL(7) +#define CAP_PHMR_MASK BIT_ULL(6) +#define CAP_PLMR_MASK BIT_ULL(5) +#define CAP_RWBF_MASK BIT_ULL(4) +#define CAP_AFL_MASK BIT_ULL(3) +#define CAP_NDOMS_MASK GENMASK_ULL(2, 0) + +/* + * Extended Capability Register Mask + */ +#define ECAP_RPS_MASK BIT_ULL(49) +#define ECAP_SMPWC_MASK BIT_ULL(48) +#define ECAP_FLTS_MASK BIT_ULL(47) +#define ECAP_SLTS_MASK BIT_ULL(46) +#define ECAP_SLADS_MASK BIT_ULL(45) +#define ECAP_VCS_MASK BIT_ULL(44) +#define ECAP_SMTS_MASK BIT_ULL(43) +#define ECAP_PDS_MASK BIT_ULL(42) +#define ECAP_DIT_MASK BIT_ULL(41) +#define ECAP_PASID_MASK BIT_ULL(40) +#define ECAP_PSS_MASK GENMASK_ULL(39, 35) +#define ECAP_EAFS_MASK BIT_ULL(34) +#define ECAP_NWFS_MASK BIT_ULL(33) +#define ECAP_SRS_MASK BIT_ULL(31) +#define ECAP_ERS_MASK BIT_ULL(30) +#define ECAP_PRS_MASK BIT_ULL(29) +#define ECAP_NEST_MASK BIT_ULL(26) +#define ECAP_MTS_MASK BIT_ULL(25) +#define ECAP_MHMV_MASK GENMASK_ULL(23, 20) +#define ECAP_IRO_MASK GENMASK_ULL(17, 8) +#define ECAP_SC_MASK BIT_ULL(7) +#define ECAP_PT_MASK BIT_ULL(6) +#define ECAP_EIM_MASK BIT_ULL(4) +#define ECAP_DT_MASK BIT_ULL(2) +#define ECAP_QI_MASK BIT_ULL(1) +#define ECAP_C_MASK BIT_ULL(0) + +/* + * u64 intel_iommu_cap_sanity, intel_iommu_ecap_sanity will be adjusted as each + * IOMMU gets audited. + */ +#define DO_CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \ +do { \ + if (cap##_##feature(a) != cap##_##feature(b)) { \ + intel_iommu_##cap##_sanity &= ~(MASK); \ + pr_info("IOMMU feature %s inconsistent", #feature); \ + } \ +} while (0) + +#define CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \ + DO_CHECK_FEATURE_MISMATCH((a)->cap, (b)->cap, cap, feature, MASK) + +#define CHECK_FEATURE_MISMATCH_HOTPLUG(b, cap, feature, MASK) \ +do { \ + if (cap##_##feature(intel_iommu_##cap##_sanity)) \ + DO_CHECK_FEATURE_MISMATCH(intel_iommu_##cap##_sanity, \ + (b)->cap, cap, feature, MASK); \ +} while (0) + +#define MINIMAL_FEATURE_IOMMU(iommu, cap, MASK) \ +do { \ + u64 min_feature = intel_iommu_##cap##_sanity & (MASK); \ + min_feature = min_t(u64, min_feature, (iommu)->cap & (MASK)); \ + intel_iommu_##cap##_sanity = (intel_iommu_##cap##_sanity & ~(MASK)) | \ + min_feature; \ +} while (0) + +#define MINIMAL_FEATURE_HOTPLUG(iommu, cap, feature, MASK, mismatch) \ +do { \ + if ((intel_iommu_##cap##_sanity & (MASK)) > \ + (cap##_##feature((iommu)->cap))) \ + mismatch = true; \ + else \ + (iommu)->cap = ((iommu)->cap & ~(MASK)) | \ + (intel_iommu_##cap##_sanity & (MASK)); \ +} while (0) + +enum cap_audit_type { + CAP_AUDIT_STATIC_DMAR, + CAP_AUDIT_STATIC_IRQR, + CAP_AUDIT_HOTPLUG_DMAR, + CAP_AUDIT_HOTPLUG_IRQR, +}; + +int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 87d0ef79ba6a..de82d5da01f9 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -47,6 +47,7 @@ #include "../irq_remapping.h" #include "pasid.h" +#include "cap_audit.h" #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE @@ -3206,6 +3207,10 @@ static int __init init_dmars(void) goto error; } + ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL); + if (ret) + goto free_iommu; + for_each_iommu(iommu, drhd) { if (drhd->ignored) { iommu_disable_translation(iommu); @@ -3757,6 +3762,10 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (g_iommus[iommu->seq_id]) return 0; + ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu); + if (ret) + goto out; + if (hw_pass_through && !ecap_pass_through(iommu->ecap)) { pr_warn("%s: Doesn't support hardware pass through.\n", iommu->name); diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 685200a5cff0..611ef5243cb6 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -22,6 +22,7 @@ #include #include "../irq_remapping.h" +#include "cap_audit.h" enum irq_mode { IRQ_REMAPPING, @@ -734,6 +735,9 @@ static int __init intel_prepare_irq_remapping(void) if (dmar_table_init() < 0) return -ENODEV; + if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL)) + goto error; + if (!dmar_ir_support()) return -ENODEV; @@ -1439,6 +1443,10 @@ static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu) int ret; int eim = x2apic_enabled(); + ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_IRQR, iommu); + if (ret) + return ret; + if (eim && !ecap_eim_support(iommu->ecap)) { pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n", iommu->reg_phys, iommu->ecap); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index f8ab154c979d..1bc46b88711a 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -170,34 +170,37 @@ * Extended Capability Register */ +#define ecap_rps(e) (((e) >> 49) & 0x1) #define ecap_smpwc(e) (((e) >> 48) & 0x1) #define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) +#define ecap_slads(e) (((e) >> 45) & 0x1) #define ecap_vcs(e) (((e) >> 44) & 0x1) #define ecap_smts(e) (((e) >> 43) & 0x1) -#define ecap_dit(e) ((e >> 41) & 0x1) -#define ecap_pasid(e) ((e >> 40) & 0x1) -#define ecap_pss(e) ((e >> 35) & 0x1f) -#define ecap_eafs(e) ((e >> 34) & 0x1) -#define ecap_nwfs(e) ((e >> 33) & 0x1) -#define ecap_srs(e) ((e >> 31) & 0x1) -#define ecap_ers(e) ((e >> 30) & 0x1) -#define ecap_prs(e) ((e >> 29) & 0x1) -#define ecap_broken_pasid(e) ((e >> 28) & 0x1) -#define ecap_dis(e) ((e >> 27) & 0x1) -#define ecap_nest(e) ((e >> 26) & 0x1) -#define ecap_mts(e) ((e >> 25) & 0x1) -#define ecap_ecs(e) ((e >> 24) & 0x1) +#define ecap_dit(e) (((e) >> 41) & 0x1) +#define ecap_pds(e) (((e) >> 42) & 0x1) +#define ecap_pasid(e) (((e) >> 40) & 0x1) +#define ecap_pss(e) (((e) >> 35) & 0x1f) +#define ecap_eafs(e) (((e) >> 34) & 0x1) +#define ecap_nwfs(e) (((e) >> 33) & 0x1) +#define ecap_srs(e) (((e) >> 31) & 0x1) +#define ecap_ers(e) (((e) >> 30) & 0x1) +#define ecap_prs(e) (((e) >> 29) & 0x1) +#define ecap_broken_pasid(e) (((e) >> 28) & 0x1) +#define ecap_dis(e) (((e) >> 27) & 0x1) +#define ecap_nest(e) (((e) >> 26) & 0x1) +#define ecap_mts(e) (((e) >> 25) & 0x1) +#define ecap_ecs(e) (((e) >> 24) & 0x1) #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) #define ecap_coherent(e) ((e) & 0x1) #define ecap_qis(e) ((e) & 0x2) -#define ecap_pass_through(e) ((e >> 6) & 0x1) -#define ecap_eim_support(e) ((e >> 4) & 0x1) -#define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_pass_through(e) (((e) >> 6) & 0x1) +#define ecap_eim_support(e) (((e) >> 4) & 0x1) +#define ecap_ir_support(e) (((e) >> 3) & 0x1) #define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1) -#define ecap_max_handle_mask(e) ((e >> 20) & 0xf) -#define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */ +#define ecap_max_handle_mask(e) (((e) >> 20) & 0xf) +#define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */ /* Virtual command interface capability */ #define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ From 010bf5659e01b0a169e8e6b9e6a8b7e62209470d Mon Sep 17 00:00:00 2001 From: Kyung Min Park Date: Thu, 4 Feb 2021 09:43:57 +0800 Subject: [PATCH 299/414] iommu/vt-d: Move capability check code to cap_audit files Move IOMMU capability check and sanity check code to cap_audit files. Also implement some helper functions for sanity checks. Signed-off-by: Kyung Min Park Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210130184452.31711-1-kyung.min.park@intel.com Link: https://lore.kernel.org/r/20210204014401.2846425-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/cap_audit.c | 20 +++++++++ drivers/iommu/intel/cap_audit.h | 20 +++++++++ drivers/iommu/intel/iommu.c | 76 +-------------------------------- 3 files changed, 42 insertions(+), 74 deletions(-) diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c index 049bc0c76a00..b12e421a2f1a 100644 --- a/drivers/iommu/intel/cap_audit.c +++ b/drivers/iommu/intel/cap_audit.c @@ -183,3 +183,23 @@ int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu) return -EFAULT; } + +bool intel_cap_smts_sanity(void) +{ + return ecap_smts(intel_iommu_ecap_sanity); +} + +bool intel_cap_pasid_sanity(void) +{ + return ecap_pasid(intel_iommu_ecap_sanity); +} + +bool intel_cap_nest_sanity(void) +{ + return ecap_nest(intel_iommu_ecap_sanity); +} + +bool intel_cap_flts_sanity(void) +{ + return ecap_flts(intel_iommu_ecap_sanity); +} diff --git a/drivers/iommu/intel/cap_audit.h b/drivers/iommu/intel/cap_audit.h index a6a1530441b7..74cfccae0e81 100644 --- a/drivers/iommu/intel/cap_audit.h +++ b/drivers/iommu/intel/cap_audit.h @@ -107,4 +107,24 @@ enum cap_audit_type { CAP_AUDIT_HOTPLUG_IRQR, }; +bool intel_cap_smts_sanity(void); +bool intel_cap_pasid_sanity(void); +bool intel_cap_nest_sanity(void); +bool intel_cap_flts_sanity(void); + +static inline bool scalable_mode_support(void) +{ + return (intel_iommu_sm && intel_cap_smts_sanity()); +} + +static inline bool pasid_mode_support(void) +{ + return scalable_mode_support() && intel_cap_pasid_sanity(); +} + +static inline bool nested_mode_support(void) +{ + return scalable_mode_support() && intel_cap_nest_sanity(); +} + int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index de82d5da01f9..7dfe681acc3d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1864,25 +1864,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu) */ static bool first_level_by_default(void) { - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - static int first_level_support = -1; - - if (likely(first_level_support != -1)) - return first_level_support; - - first_level_support = 1; - - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) { - first_level_support = 0; - break; - } - } - rcu_read_unlock(); - - return first_level_support; + return scalable_mode_support() && intel_cap_flts_sanity(); } static struct dmar_domain *alloc_domain(int flags) @@ -5058,60 +5040,6 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } -static inline bool scalable_mode_support(void) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - bool ret = true; - - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (!sm_supported(iommu)) { - ret = false; - break; - } - } - rcu_read_unlock(); - - return ret; -} - -static inline bool iommu_pasid_support(void) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - bool ret = true; - - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (!pasid_supported(iommu)) { - ret = false; - break; - } - } - rcu_read_unlock(); - - return ret; -} - -static inline bool nested_mode_support(void) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - bool ret = true; - - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) { - ret = false; - break; - } - } - rcu_read_unlock(); - - return ret; -} - static bool intel_iommu_capable(enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) @@ -5352,7 +5280,7 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) int ret; if (!dev_is_pci(dev) || dmar_disabled || - !scalable_mode_support() || !iommu_pasid_support()) + !scalable_mode_support() || !pasid_mode_support()) return false; ret = pci_pasid_features(to_pci_dev(dev)); From 933fcd01e97e2ba29880dd5f1239365e40094950 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 4 Feb 2021 09:43:58 +0800 Subject: [PATCH 300/414] iommu/vt-d: Add iotlb_sync_map callback Some Intel VT-d hardware implementations don't support memory coherency for page table walk (presented by the Page-Walk-coherency bit in the ecap register), so that software must flush the corresponding CPU cache lines explicitly after each page table entry update. The iommu_map_sg() code iterates through the given scatter-gather list and invokes iommu_map() for each element in the scatter-gather list, which calls into the vendor IOMMU driver through iommu_ops callback. As the result, a single sg mapping may lead to multiple cache line flushes, which leads to the degradation of I/O performance after the commit ("iommu/vt-d: Convert intel iommu driver to the iommu ops"). Fix this by adding iotlb_sync_map callback and centralizing the clflush operations after all sg mappings. Fixes: c588072bba6b5 ("iommu/vt-d: Convert intel iommu driver to the iommu ops") Reported-by: Chuck Lever Link: https://lore.kernel.org/linux-iommu/D81314ED-5673-44A6-B597-090E3CB83EB0@oracle.com/ Signed-off-by: Lu Baolu Cc: Robin Murphy [ cel: removed @first_pte, which is no longer used ] Signed-off-by: Chuck Lever Link: https://lore.kernel.org/linux-iommu/161177763962.1311.15577661784296014186.stgit@manet.1015granger.net Link: https://lore.kernel.org/r/20210204014401.2846425-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 90 ++++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7dfe681acc3d..761c1b251c1e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2283,9 +2283,9 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phys_pfn, unsigned long nr_pages, int prot) { - struct dma_pte *first_pte = NULL, *pte = NULL; unsigned int largepage_lvl = 0; unsigned long lvl_pages = 0; + struct dma_pte *pte = NULL; phys_addr_t pteval; u64 attr; @@ -2314,7 +2314,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, nr_pages); - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); + pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); if (!pte) return -ENOMEM; /* It is large page*/ @@ -2375,34 +2375,14 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, * recalculate 'pte' and switch back to smaller pages for the * end of the mapping, if the trailing size is not enough to * use another superpage (i.e. nr_pages < lvl_pages). + * + * We leave clflush for the leaf pte changes to iotlb_sync_map() + * callback. */ pte++; if (!nr_pages || first_pte_in_page(pte) || - (largepage_lvl > 1 && nr_pages < lvl_pages)) { - domain_flush_cache(domain, first_pte, - (void *)pte - (void *)first_pte); + (largepage_lvl > 1 && nr_pages < lvl_pages)) pte = NULL; - } - } - - return 0; -} - -static int -domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, int prot) -{ - int iommu_id, ret; - struct intel_iommu *iommu; - - /* Do the real mapping first */ - ret = __domain_mapping(domain, iov_pfn, phys_pfn, nr_pages, prot); - if (ret) - return ret; - - for_each_domain_iommu(iommu_id, domain) { - iommu = g_iommus[iommu_id]; - __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); } return 0; @@ -4943,7 +4923,6 @@ static int intel_iommu_map(struct iommu_domain *domain, struct dmar_domain *dmar_domain = to_dmar_domain(domain); u64 max_addr; int prot = 0; - int ret; if (iommu_prot & IOMMU_READ) prot |= DMA_PTE_READ; @@ -4969,9 +4948,8 @@ static int intel_iommu_map(struct iommu_domain *domain, /* Round up size to next multiple of PAGE_SIZE, if it and the low bits of hpa would take us onto the next page */ size = aligned_nrpages(hpa, size); - ret = domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, - hpa >> VTD_PAGE_SHIFT, size, prot); - return ret; + return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, + hpa >> VTD_PAGE_SHIFT, size, prot); } static size_t intel_iommu_unmap(struct iommu_domain *domain, @@ -5424,6 +5402,57 @@ static bool risky_device(struct pci_dev *pdev) return false; } +static void clflush_sync_map(struct dmar_domain *domain, unsigned long clf_pfn, + unsigned long clf_pages) +{ + struct dma_pte *first_pte = NULL, *pte = NULL; + unsigned long lvl_pages = 0; + int level = 0; + + while (clf_pages > 0) { + if (!pte) { + level = 0; + pte = pfn_to_dma_pte(domain, clf_pfn, &level); + if (WARN_ON(!pte)) + return; + first_pte = pte; + lvl_pages = lvl_to_nr_pages(level); + } + + if (WARN_ON(!lvl_pages || clf_pages < lvl_pages)) + return; + + clf_pages -= lvl_pages; + clf_pfn += lvl_pages; + pte++; + + if (!clf_pages || first_pte_in_page(pte) || + (level > 1 && clf_pages < lvl_pages)) { + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + pte = NULL; + } + } +} + +static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + unsigned long pages = aligned_nrpages(iova, size); + unsigned long pfn = iova >> VTD_PAGE_SHIFT; + struct intel_iommu *iommu; + int iommu_id; + + if (!dmar_domain->iommu_coherency) + clflush_sync_map(dmar_domain, pfn, pages); + + for_each_domain_iommu(iommu_id, dmar_domain) { + iommu = g_iommus[iommu_id]; + __mapping_notify_one(iommu, dmar_domain, pfn, pages); + } +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -5436,6 +5465,7 @@ const struct iommu_ops intel_iommu_ops = { .aux_detach_dev = intel_iommu_aux_detach_device, .aux_get_pasid = intel_iommu_aux_get_pasid, .map = intel_iommu_map, + .iotlb_sync_map = intel_iommu_iotlb_sync_map, .unmap = intel_iommu_unmap, .flush_iotlb_all = intel_flush_iotlb_all, .iotlb_sync = intel_iommu_tlb_sync, From 81d3c75bb3c32ca61712e093b8dce89d73c22150 Mon Sep 17 00:00:00 2001 From: Yian Chen Date: Thu, 4 Feb 2021 09:43:59 +0800 Subject: [PATCH 301/414] iommu/vt-d: Add new enum value and structure for SATC Starting from Intel Platform VT-d v3.2, BIOS may provide new remapping structure SATC for SOC integrated devices, according to section 8.8 of Intel VT-d architecture specification v3.2. The SATC structure reports a list of the devices that require ATS for normal device operation. It is a functional requirement that these devices will not work without OS enabling ATS capability. This patch introduces the new enum value and structure to represent the remapping information. Kernel should parse the information from the reporting structure and enable ATC for the devices as needed. Signed-off-by: Yian Chen Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210203093329.1617808-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210204014401.2846425-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/acpi/actbl1.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 43549547ed3e..c38e08cf1b9e 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -514,7 +514,8 @@ enum acpi_dmar_type { ACPI_DMAR_TYPE_ROOT_ATS = 2, ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3, ACPI_DMAR_TYPE_NAMESPACE = 4, - ACPI_DMAR_TYPE_RESERVED = 5 /* 5 and greater are reserved */ + ACPI_DMAR_TYPE_SATC = 5, + ACPI_DMAR_TYPE_RESERVED = 6 /* 6 and greater are reserved */ }; /* DMAR Device Scope structure */ @@ -607,6 +608,14 @@ struct acpi_dmar_andd { char device_name[1]; }; +/* 5: SOC Integrated Address Translation Cache Reporting Structure */ + +struct acpi_dmar_satc { + struct acpi_dmar_header header; + u8 flags; + u8 reserved; + u16 segment; +}; /******************************************************************************* * * DRTM - Dynamic Root of Trust for Measurement table From 31a75cbbb9274cf8185f402904bf11386917870b Mon Sep 17 00:00:00 2001 From: Yian Chen Date: Thu, 4 Feb 2021 09:44:00 +0800 Subject: [PATCH 302/414] iommu/vt-d: Parse SATC reporting structure Software should parse every SATC table and all devices in the tables reported by the BIOS and keep the information in kernel list for further reference. Signed-off-by: Yian Chen Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210203093329.1617808-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210204014401.2846425-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 8 ++++ drivers/iommu/intel/iommu.c | 89 +++++++++++++++++++++++++++++++++++++ include/linux/dmar.h | 2 + 3 files changed, 99 insertions(+) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index bd51f33642e0..6b8fce1c85c4 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -526,6 +526,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) struct acpi_dmar_reserved_memory *rmrr; struct acpi_dmar_atsr *atsr; struct acpi_dmar_rhsa *rhsa; + struct acpi_dmar_satc *satc; switch (header->type) { case ACPI_DMAR_TYPE_HARDWARE_UNIT: @@ -555,6 +556,10 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) /* We don't print this here because we need to sanity-check it first. So print it in dmar_parse_one_andd() instead. */ break; + case ACPI_DMAR_TYPE_SATC: + satc = container_of(header, struct acpi_dmar_satc, header); + pr_info("SATC flags: 0x%x\n", satc->flags); + break; } } @@ -642,6 +647,7 @@ parse_dmar_table(void) .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr, .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa, .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd, + .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc, }; /* @@ -2077,6 +2083,7 @@ static guid_t dmar_hp_guid = #define DMAR_DSM_FUNC_DRHD 1 #define DMAR_DSM_FUNC_ATSR 2 #define DMAR_DSM_FUNC_RHSA 3 +#define DMAR_DSM_FUNC_SATC 4 static inline bool dmar_detect_dsm(acpi_handle handle, int func) { @@ -2094,6 +2101,7 @@ static int dmar_walk_dsm_resource(acpi_handle handle, int func, [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT, [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS, [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY, + [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC, }; if (!dmar_detect_dsm(handle, func)) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 761c1b251c1e..384cc546632a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -316,8 +316,18 @@ struct dmar_atsr_unit { u8 include_all:1; /* include all ports */ }; +struct dmar_satc_unit { + struct list_head list; /* list of SATC units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + struct dmar_dev_scope *devices; /* target devices */ + struct intel_iommu *iommu; /* the corresponding iommu */ + int devices_cnt; /* target device count */ + u8 atc_required:1; /* ATS is required */ +}; + static LIST_HEAD(dmar_atsr_units); static LIST_HEAD(dmar_rmrr_units); +static LIST_HEAD(dmar_satc_units); #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) @@ -3716,6 +3726,57 @@ int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg) return 0; } +static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc) +{ + struct dmar_satc_unit *satcu; + struct acpi_dmar_satc *tmp; + + list_for_each_entry_rcu(satcu, &dmar_satc_units, list, + dmar_rcu_check()) { + tmp = (struct acpi_dmar_satc *)satcu->hdr; + if (satc->segment != tmp->segment) + continue; + if (satc->header.length != tmp->header.length) + continue; + if (memcmp(satc, tmp, satc->header.length) == 0) + return satcu; + } + + return NULL; +} + +int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg) +{ + struct acpi_dmar_satc *satc; + struct dmar_satc_unit *satcu; + + if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) + return 0; + + satc = container_of(hdr, struct acpi_dmar_satc, header); + satcu = dmar_find_satc(satc); + if (satcu) + return 0; + + satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL); + if (!satcu) + return -ENOMEM; + + satcu->hdr = (void *)(satcu + 1); + memcpy(satcu->hdr, hdr, hdr->length); + satcu->atc_required = satc->flags & 0x1; + satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1), + (void *)satc + satc->header.length, + &satcu->devices_cnt); + if (satcu->devices_cnt && !satcu->devices) { + kfree(satcu); + return -ENOMEM; + } + list_add_rcu(&satcu->list, &dmar_satc_units); + + return 0; +} + static int intel_iommu_add(struct dmar_drhd_unit *dmaru) { int sp, ret; @@ -3823,6 +3884,7 @@ static void intel_iommu_free_dmars(void) { struct dmar_rmrr_unit *rmrru, *rmrr_n; struct dmar_atsr_unit *atsru, *atsr_n; + struct dmar_satc_unit *satcu, *satc_n; list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { list_del(&rmrru->list); @@ -3834,6 +3896,11 @@ static void intel_iommu_free_dmars(void) list_del(&atsru->list); intel_iommu_free_atsr(atsru); } + list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) { + list_del(&satcu->list); + dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt); + kfree(satcu); + } } int dmar_find_matched_atsr_unit(struct pci_dev *dev) @@ -3885,8 +3952,10 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) int ret; struct dmar_rmrr_unit *rmrru; struct dmar_atsr_unit *atsru; + struct dmar_satc_unit *satcu; struct acpi_dmar_atsr *atsr; struct acpi_dmar_reserved_memory *rmrr; + struct acpi_dmar_satc *satc; if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING) return 0; @@ -3927,6 +3996,23 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) break; } } + list_for_each_entry(satcu, &dmar_satc_units, list) { + satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); + if (info->event == BUS_NOTIFY_ADD_DEVICE) { + ret = dmar_insert_dev_scope(info, (void *)(satc + 1), + (void *)satc + satc->header.length, + satc->segment, satcu->devices, + satcu->devices_cnt); + if (ret > 0) + break; + else if (ret < 0) + return ret; + } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { + if (dmar_remove_dev_scope(info, satc->segment, + satcu->devices, satcu->devices_cnt)) + break; + } + } return 0; } @@ -4270,6 +4356,9 @@ int __init intel_iommu_init(void) if (list_empty(&dmar_atsr_units)) pr_info("No ATSR found\n"); + if (list_empty(&dmar_satc_units)) + pr_info("No SATC found\n"); + if (dmar_map_gfx) intel_iommu_gfx_mapped = 1; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 65565820328a..e04436a7ff27 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -138,6 +138,7 @@ extern void intel_iommu_shutdown(void); extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg); extern int dmar_parse_one_atsr(struct acpi_dmar_header *header, void *arg); extern int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg); +extern int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg); extern int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg); extern int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert); extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info); @@ -149,6 +150,7 @@ static inline void intel_iommu_shutdown(void) { } #define dmar_parse_one_atsr dmar_res_noop #define dmar_check_one_atsr dmar_res_noop #define dmar_release_one_atsr dmar_res_noop +#define dmar_parse_one_satc dmar_res_noop static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) { From 2da2687b5116b0b791b14e56ff089c549986e48e Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 25 Jan 2021 15:48:47 +0100 Subject: [PATCH 303/414] kgdb: rectify kernel-doc for kgdb_unregister_io_module() The command 'find ./kernel/debug/ | xargs ./scripts/kernel-doc -none' reported a typo in the kernel-doc of kgdb_unregister_io_module(). Rectify the kernel-doc, such that no issues remain for ./kernel/debug/. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20210125144847.21896-1-lukas.bulwahn@gmail.com Signed-off-by: Daniel Thompson --- kernel/debug/debug_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index af6e8b4fb359..7f22c1c0ffe8 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -1166,7 +1166,7 @@ int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops) EXPORT_SYMBOL_GPL(kgdb_register_io_module); /** - * kkgdb_unregister_io_module - unregister KGDB IO module + * kgdb_unregister_io_module - unregister KGDB IO module * @old_dbg_io_ops: the io ops vector * * Unregister it with the KGDB core. From cbd026e1d84bd7ca18fd76883f7733ac72d44000 Mon Sep 17 00:00:00 2001 From: wengjianfeng Date: Wed, 3 Feb 2021 16:10:34 +0800 Subject: [PATCH 304/414] kernel: debug: fix typo issue change 'regster' to 'register'. Signed-off-by: wengjianfeng Link: https://lore.kernel.org/r/20210203081034.9004-1-samirweng1979@163.com Signed-off-by: Daniel Thompson --- kernel/debug/gdbstub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index a77df59d9ca5..e149a0ac9e9e 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -595,7 +595,7 @@ static char *gdb_hex_reg_helper(int regnum, char *out) dbg_reg_def[i].size); } -/* Handle the 'p' individual regster get */ +/* Handle the 'p' individual register get */ static void gdb_cmd_reg_get(struct kgdb_state *ks) { unsigned long regnum; @@ -610,7 +610,7 @@ static void gdb_cmd_reg_get(struct kgdb_state *ks) gdb_hex_reg_helper(regnum, remcom_out_buffer); } -/* Handle the 'P' individual regster set */ +/* Handle the 'P' individual register set */ static void gdb_cmd_reg_set(struct kgdb_state *ks) { unsigned long regnum; From 0759d8072843fe621b4d7abb31a7b7bc84ae4159 Mon Sep 17 00:00:00 2001 From: Stephen Zhang Date: Thu, 4 Feb 2021 20:07:09 +0800 Subject: [PATCH 305/414] kdb: kdb_support: Fix debugging information problem There are several common patterns. 0: kdb_printf("...",...); which is the normal one. 1: kdb_printf("%s: "...,__func__,...) We could improve '1' to this : #define kdb_func_printf(format, args...) \ kdb_printf("%s: " format, __func__, ## args) 2: if(KDB_DEBUG(AR)) kdb_printf("%s "...,__func__,...); We could improve '2' to this : #define kdb_dbg_printf(mask, format, args...) \ do { \ if (KDB_DEBUG(mask)) \ kdb_func_printf(format, ## args); \ } while (0) In addition, we changed the format code of size_t to %zu. Signed-off-by: Stephen Zhang Link: https://lore.kernel.org/r/1612440429-6391-1-git-send-email-stephenzhangzsd@gmail.com Reviewed-by: Douglas Anderson [daniel.thompson@linaro.org: Minor typo and line length fixes in the patch description] Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_private.h | 10 +++++++ kernel/debug/kdb/kdb_support.c | 53 +++++++++++++--------------------- 2 files changed, 30 insertions(+), 33 deletions(-) diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index a4281fb99299..0a56d35f6eea 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -254,4 +254,14 @@ extern char kdb_prompt_str[]; #define KDB_WORD_SIZE ((int)sizeof(unsigned long)) #endif /* CONFIG_KGDB_KDB */ + +#define kdb_func_printf(format, args...) \ + kdb_printf("%s: " format, __func__, ## args) + +#define kdb_dbg_printf(mask, format, args...) \ + do { \ + if (KDB_DEBUG(mask)) \ + kdb_func_printf(format, ## args); \ + } while (0) + #endif /* !_KDBPRIVATE_H */ diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 6226502ce049..f7c1885abeb6 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -39,20 +39,15 @@ */ int kdbgetsymval(const char *symname, kdb_symtab_t *symtab) { - if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: symname=%s, symtab=%px\n", symname, - symtab); + kdb_dbg_printf(AR, "symname=%s, symtab=%px\n", symname, symtab); memset(symtab, 0, sizeof(*symtab)); symtab->sym_start = kallsyms_lookup_name(symname); if (symtab->sym_start) { - if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: returns 1, " - "symtab->sym_start=0x%lx\n", - symtab->sym_start); + kdb_dbg_printf(AR, "returns 1, symtab->sym_start=0x%lx\n", + symtab->sym_start); return 1; } - if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: returns 0\n"); + kdb_dbg_printf(AR, "returns 0\n"); return 0; } EXPORT_SYMBOL(kdbgetsymval); @@ -87,16 +82,14 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) #define knt1_size 128 /* must be >= kallsyms table size */ char *knt1 = NULL; - if (KDB_DEBUG(AR)) - kdb_printf("kdbnearsym: addr=0x%lx, symtab=%px\n", addr, symtab); + kdb_dbg_printf(AR, "addr=0x%lx, symtab=%px\n", addr, symtab); memset(symtab, 0, sizeof(*symtab)); if (addr < 4096) goto out; knt1 = debug_kmalloc(knt1_size, GFP_ATOMIC); if (!knt1) { - kdb_printf("kdbnearsym: addr=0x%lx cannot kmalloc knt1\n", - addr); + kdb_func_printf("addr=0x%lx cannot kmalloc knt1\n", addr); goto out; } symtab->sym_name = kallsyms_lookup(addr, &symbolsize , &offset, @@ -147,11 +140,8 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) if (symtab->mod_name == NULL) symtab->mod_name = "kernel"; - if (KDB_DEBUG(AR)) - kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, " - "symtab->mod_name=%px, symtab->sym_name=%px (%s)\n", ret, - symtab->sym_start, symtab->mod_name, symtab->sym_name, - symtab->sym_name); + kdb_dbg_printf(AR, "returns %d symtab->sym_start=0x%lx, symtab->mod_name=%px, symtab->sym_name=%px (%s)\n", + ret, symtab->sym_start, symtab->mod_name, symtab->sym_name, symtab->sym_name); out: debug_kfree(knt1); @@ -328,7 +318,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size) int ret = copy_from_kernel_nofault((char *)res, (char *)addr, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { - kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr); + kdb_func_printf("Bad address 0x%lx\n", addr); KDB_STATE_SET(SUPPRESS); } ret = KDB_BADADDR; @@ -353,7 +343,7 @@ int kdb_putarea_size(unsigned long addr, void *res, size_t size) int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { - kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr); + kdb_func_printf("Bad address 0x%lx\n", addr); KDB_STATE_SET(SUPPRESS); } ret = KDB_BADADDR; @@ -435,7 +425,7 @@ int kdb_getphysword(unsigned long *word, unsigned long addr, size_t size) fallthrough; default: diag = KDB_BADWIDTH; - kdb_printf("kdb_getphysword: bad width %ld\n", (long) size); + kdb_func_printf("bad width %zu\n", size); } return diag; } @@ -484,7 +474,7 @@ int kdb_getword(unsigned long *word, unsigned long addr, size_t size) fallthrough; default: diag = KDB_BADWIDTH; - kdb_printf("kdb_getword: bad width %ld\n", (long) size); + kdb_func_printf("bad width %zu\n", size); } return diag; } @@ -528,7 +518,7 @@ int kdb_putword(unsigned long addr, unsigned long word, size_t size) fallthrough; default: diag = KDB_BADWIDTH; - kdb_printf("kdb_putword: bad width %ld\n", (long) size); + kdb_func_printf("bad width %zu\n", size); } return diag; } @@ -602,8 +592,7 @@ unsigned long kdb_task_state_string(const char *s) res = ~0UL; break; default: - kdb_printf("%s: unknown flag '%c' ignored\n", - __func__, *s); + kdb_func_printf("unknown flag '%c' ignored\n", *s); break; } ++s; @@ -884,18 +873,16 @@ void debug_kusage(void) if (!debug_kusage_one_time) goto out; debug_kusage_one_time = 0; - kdb_printf("%s: debug_kmalloc memory leak dah_first %d\n", - __func__, dah_first); + kdb_func_printf("debug_kmalloc memory leak dah_first %d\n", dah_first); if (dah_first) { h_used = (struct debug_alloc_header *)debug_alloc_pool; - kdb_printf("%s: h_used %px size %d\n", __func__, h_used, - h_used->size); + kdb_func_printf("h_used %px size %d\n", h_used, h_used->size); } do { h_used = (struct debug_alloc_header *) ((char *)h_free + dah_overhead + h_free->size); - kdb_printf("%s: h_used %px size %d caller %px\n", - __func__, h_used, h_used->size, h_used->caller); + kdb_func_printf("h_used %px size %d caller %px\n", + h_used, h_used->size, h_used->caller); h_free = (struct debug_alloc_header *) (debug_alloc_pool + h_free->next); } while (h_free->next); @@ -903,8 +890,8 @@ void debug_kusage(void) ((char *)h_free + dah_overhead + h_free->size); if ((char *)h_used - debug_alloc_pool != sizeof(debug_alloc_pool_aligned)) - kdb_printf("%s: h_used %px size %d caller %px\n", - __func__, h_used, h_used->size, h_used->caller); + kdb_func_printf("h_used %px size %d caller %px\n", + h_used, h_used->size, h_used->caller); out: spin_unlock(&dap_lock); } From 78f101a1b25848a364c632237ee6a7a6ec468235 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 22 Dec 2020 14:05:55 +0100 Subject: [PATCH 306/414] Documentation/submitting-patches: Add blurb about backtraces in commit messages Document that backtraces in commit messages should be trimmed down to the useful information only. This has been carved out from a tip subsystem handbook patchset by Thomas Gleixner: https://lkml.kernel.org/r/20181107171010.421878737@linutronix.de and incorporates follow-on comments. Signed-off-by: Borislav Petkov Signed-off-by: Jonathan Corbet --- Documentation/process/submitting-patches.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 7f48cccc75cd..8c991c863628 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -684,6 +684,26 @@ generates appropriate diffstats by default.) See more details on the proper patch format in the following references. +Backtraces in commit mesages +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Backtraces help document the call chain leading to a problem. However, +not all backtraces are helpful. For example, early boot call chains are +unique and obvious. Copying the full dmesg output verbatim, however, +adds distracting information like timestamps, module lists, register and +stack dumps. + +Therefore, the most useful backtraces should distill the relevant +information from the dump, which makes it easier to focus on the real +issue. Here is an example of a well-trimmed backtrace:: + + unchecked MSR access error: WRMSR to 0xd51 (tried to write 0x0000000000000064) + at rIP: 0xffffffffae059994 (native_write_msr+0x4/0x20) + Call Trace: + mba_wrmsr + update_domains + rdtgroup_mkdir + .. _explicit_in_reply_to: Explicit In-Reply-To headers From 4ba1d726c45d644525883565ff5850ddc7b4a718 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 2 Feb 2021 19:32:43 -0800 Subject: [PATCH 307/414] Documentation: /proc/loadavg: add 3 more field descriptions Update contents of /proc/loadavg: add 3 more fields. Signed-off-by: Randy Dunlap Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/fe55b139-bd03-4762-199b-83be873cf7dd@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/proc.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 2fa69f710e2a..9abdba17565e 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -687,7 +687,10 @@ files are there, and which are missing. kcore Kernel core image (can be ELF or A.OUT(deprecated in 2.4)) kmsg Kernel messages ksyms Kernel symbol table - loadavg Load average of last 1, 5 & 15 minutes + loadavg Load average of last 1, 5 & 15 minutes; + number of processes currently runnable (running or on ready queue); + total number of processes in system; + last pid created. locks Kernel locks meminfo Memory info misc Miscellaneous From 26606ce072d48ab82f640f75ab9673ee10ab4a5a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 24 Aug 2020 21:55:58 -0700 Subject: [PATCH 308/414] coding-style.rst: Avoid comma statements Commas are not how statements are terminated. Always use semicolons and braces if necessary. Signed-off-by: Joe Perches Link: https://lore.kernel.org/r/2a97b738bba335434461a5a918053a49c1fb6af4.1598331148.git.joe@perches.com Signed-off-by: Jonathan Corbet --- Documentation/process/coding-style.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst index 98227226c4e5..a1e061149e0d 100644 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@ -69,9 +69,26 @@ something to hide: if (condition) do_this; do_something_everytime; +Don't use commas to avoid using braces: + +.. code-block:: c + + if (condition) + do_this(), do_that(); + +Always uses braces for multiple statements: + +.. code-block:: c + + if (condition) { + do_this(); + do_that(); + } + Don't put multiple assignments on a single line either. Kernel coding style is super simple. Avoid tricky expressions. + Outside of comments, documentation and except in Kconfig, spaces are never used for indentation, and the above example is deliberately broken. From dd58e649742a5eabd327d47096f12d3302d908f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 29 Jan 2021 22:45:46 -0300 Subject: [PATCH 309/414] docs: Make syscalls' helpers naming consistent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The documentation explains the need to create internal syscalls' helpers, and that they should be called `kern_xyzzy()`. However, the comment at include/linux/syscalls.h says that they should be named as `ksys_xyzzy()`, and so are all the helpers declared bellow it. Change the documentation to reflect this. Fixes: 819671ff849b ("syscalls: define and explain goal to not call syscalls in the kernel") Signed-off-by: André Almeida Reviewed-by: Dominik Brodowski Link: https://lore.kernel.org/r/20210130014547.123006-1-andrealmeid@collabora.com Signed-off-by: Jonathan Corbet --- Documentation/process/adding-syscalls.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/adding-syscalls.rst b/Documentation/process/adding-syscalls.rst index 02857b5ad2b5..906c47f1a9e5 100644 --- a/Documentation/process/adding-syscalls.rst +++ b/Documentation/process/adding-syscalls.rst @@ -501,7 +501,7 @@ table, but not from elsewhere in the kernel. If the syscall functionality is useful to be used within the kernel, needs to be shared between an old and a new syscall, or needs to be shared between a syscall and its compatibility variant, it should be implemented by means of a "helper" function (such as -``kern_xyzzy()``). This kernel function may then be called within the +``ksys_xyzzy()``). This kernel function may then be called within the syscall stub (``sys_xyzzy()``), the compatibility syscall stub (``compat_sys_xyzzy()``), and/or other kernel code. From 61ffd285bddc8666f23d36f78bf8e5c2e2c92c04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 29 Jan 2021 22:45:47 -0300 Subject: [PATCH 310/414] Documentation: admin-guide: Update kvm/xen config option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 9bba03d4473d ("kconfig: remove 'kvmconfig' and 'xenconfig' shorthands") kvm/xen config shortcuts are not available anymore. Update the file to reflect how they should be used, with the full filename. Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20210130014547.123006-2-andrealmeid@collabora.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/README.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index 261b7b4cca1f..35314b63008c 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -226,10 +226,11 @@ Configuring the kernel all module options to built in (=y) options. You can also preserve modules by LMC_KEEP. - "make kvmconfig" Enable additional options for kvm guest kernel support. + "make kvm_guest.config" Enable additional options for kvm guest kernel + support. - "make xenconfig" Enable additional options for xen dom0 guest kernel - support. + "make xen.config" Enable additional options for xen dom0 guest kernel + support. "make tinyconfig" Configure the tiniest possible kernel. From 8fa4e9388006bd2964e39cba241d8e59e5641438 Mon Sep 17 00:00:00 2001 From: Flavio Suligoi Date: Fri, 29 Jan 2021 14:20:35 +0100 Subject: [PATCH 311/414] docs: thermal: fix spelling mistakes Signed-off-by: Flavio Suligoi Reviewed-by: Viresh Kumar Link: https://lore.kernel.org/r/20210129132035.16967-1-f.suligoi@asem.it Signed-off-by: Jonathan Corbet --- Documentation/driver-api/thermal/sysfs-api.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst index e7520cb439ac..71da7dc8c0ba 100644 --- a/Documentation/driver-api/thermal/sysfs-api.rst +++ b/Documentation/driver-api/thermal/sysfs-api.rst @@ -54,7 +54,7 @@ temperature) and throttle appropriate devices. trips: the total number of trip points this thermal zone supports. mask: - Bit string: If 'n'th bit is set, then trip point 'n' is writeable. + Bit string: If 'n'th bit is set, then trip point 'n' is writable. devdata: device private data ops: @@ -406,7 +406,7 @@ Thermal cooling device sys I/F, created once it's registered:: |---stats/reset: Writing any value resets the statistics |---stats/time_in_state_ms: Time (msec) spent in various cooling states |---stats/total_trans: Total number of times cooling state is changed - |---stats/trans_table: Cooing state transition table + |---stats/trans_table: Cooling state transition table Then next two dynamic attributes are created/removed in pairs. They represent @@ -779,5 +779,5 @@ emergency poweroff kicks in after the delay has elapsed and shuts down the system. If set to 0 emergency poweroff will not be supported. So a carefully -profiled non-zero positive value is a must for emergerncy poweroff to be +profiled non-zero positive value is a must for emergency poweroff to be triggered. From ea1d838980f4afe457a48773dfe142af58aba8bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 28 Jan 2021 01:01:25 +0000 Subject: [PATCH 312/414] docs: Enable usage of relative paths to docs on automarkup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, a cross-reference to another document could only be created by writing the full path to the document starting from the Documentation/ directory. Extend this to also allow relative paths to be used. A relative path would be just the path, like ../filename.rst, while the absolute path still needs to start from Documentation, like Documentation/filename.rst. As part of this change, the .rst extension is now required for both types of paths, since not requiring it would cause the regex to be too generic. Suggested-by: Jonathan Corbet Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20210128010028.58541-2-nfraprado@protonmail.com [jc: Tweaked the regex to recognize .txt too] Signed-off-by: Jonathan Corbet --- Documentation/sphinx/automarkup.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/sphinx/automarkup.py b/Documentation/sphinx/automarkup.py index 953b24b6e2b4..acf5473002f3 100644 --- a/Documentation/sphinx/automarkup.py +++ b/Documentation/sphinx/automarkup.py @@ -51,7 +51,7 @@ RE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=ascii_p3) # Detects a reference to a documentation page of the form Documentation/... with # an optional extension # -RE_doc = re.compile(r'\bDocumentation(/[\w\-_/]+)(\.\w+)*') +RE_doc = re.compile(r'(\bDocumentation/)?((\.\./)*[\w\-/]+)\.(rst|txt)') RE_namespace = re.compile(r'^\s*..\s*c:namespace::\s*(\S+)\s*$') @@ -234,7 +234,10 @@ def markup_doc_ref(docname, app, match): # # Go through the dance of getting an xref out of the std domain # - target = match.group(1) + absolute = match.group(1) + target = match.group(2) + if absolute: + target = "/" + target xref = None pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'doc', reftarget = target, modname = None, From 1e013ff7cb54a0045c78d7426bd5369ed7f82260 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 28 Jan 2021 01:01:36 +0000 Subject: [PATCH 313/414] docs: Document cross-referencing using relative path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the Cross-referencing section to explain how to create a cross-reference to a document using relative paths and with no additional syntax, by relying on automarkup.py. Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20210128010028.58541-3-nfraprado@protonmail.com Signed-off-by: Jonathan Corbet --- Documentation/doc-guide/sphinx.rst | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst index 36ac2166ad67..ec3e71f56009 100644 --- a/Documentation/doc-guide/sphinx.rst +++ b/Documentation/doc-guide/sphinx.rst @@ -340,16 +340,26 @@ Rendered as: Cross-referencing ----------------- -Cross-referencing from one documentation page to another can be done by passing -the path to the file starting from the Documentation folder. -For example, to cross-reference to this page (the .rst extension is optional):: +Cross-referencing from one documentation page to another can be done simply by +writing the path to the document file, no special syntax required. The path can +be either absolute or relative. For absolute paths, start it with +"Documentation/". For example, to cross-reference to this page, all the +following are valid options, depending on the current document's directory (note +that the ``.rst`` extension is required):: - See Documentation/doc-guide/sphinx.rst. + See Documentation/doc-guide/sphinx.rst. This always works. + Take a look at sphinx.rst, which is at this same directory. + Read ../sphinx.rst, which is one directory above. -If you want to use a relative path, you need to use Sphinx's ``doc`` directive. -For example, referencing this page from the same directory would be done as:: +If you want the link to have a different rendered text other than the document's +title, you need to use Sphinx's ``doc`` role. For example:: - See :doc:`sphinx`. + See :doc:`my custom link text for document sphinx `. + +For most use cases, the former is preferred, as it is cleaner and more suited +for people reading the source files. If you come across a ``:doc:`` usage that +isn't adding any value, please feel free to convert it to just the document +path. For information on cross-referencing to kernel-doc functions or types, see Documentation/doc-guide/kernel-doc.rst. From 7a5661739d4bf23cd356fb13bb1e0790e09df3c5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 4 Feb 2021 15:00:01 +0000 Subject: [PATCH 314/414] iommu/mediatek: Fix unsigned domid comparison with less than zero Currently the check for domid < 0 is always false because domid is unsigned. Fix this by casting domid to an int before making the comparison. Signed-off-by: Colin Ian King Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210204150001.102672-1-colin.king@canonical.com Addresses-Coverity: ("Unsigned comparison against 0") Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 0ad14a7604b1..1f262621ef19 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -645,7 +645,7 @@ static void mtk_iommu_get_resv_regions(struct device *dev, struct iommu_resv_region *region; int prot = IOMMU_WRITE | IOMMU_READ; - if (domid < 0) + if ((int)domid < 0) return; curdom = data->plat_data->iova_region + domid; for (i = 0; i < data->plat_data->iova_region_nr; i++) { From b53005d66bb0c3939790d89702333f89ce556c5b Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Fri, 5 Feb 2021 16:37:58 +0800 Subject: [PATCH 315/414] RDMA/pvrdma: Replace spin_lock_irqsave with spin_lock in hard IRQ There is no need to do irqsave and irqrestore in context of hard IRQ. Link: https://lore.kernel.org/r/1612514278-49220-1-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 00a330909bb3..4b6019e7de67 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -474,7 +474,6 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / sizeof(struct pvrdma_cqne); unsigned int head; - unsigned long flags; dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); @@ -483,11 +482,11 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) struct pvrdma_cq *cq; cqne = get_cqne(dev, head); - spin_lock_irqsave(&dev->cq_tbl_lock, flags); + spin_lock(&dev->cq_tbl_lock); cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; if (cq) refcount_inc(&cq->refcnt); - spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); + spin_unlock(&dev->cq_tbl_lock); if (cq && cq->ibcq.comp_handler) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); From 3ce60f443b143e649aa26cd3f668d645434647ac Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 3 Feb 2021 15:01:29 +0200 Subject: [PATCH 316/414] IB/mlx5: Move mlx5_port_caps from mlx5_core_dev to mlx5_ib_dev mlx5_port_caps are RDMA specific capabilities. These are not used by the mlx5_core_device at all. Move them to mlx5_ib_dev where it is used and reduce the scope of it to multiple drivers. Link: https://lore.kernel.org/r/20210203130133.4057329-2-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mad.c | 8 ++++---- drivers/infiniband/hw/mlx5/main.c | 14 ++++++-------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 8 ++++++++ drivers/infiniband/hw/mlx5/qp.c | 6 +++--- drivers/infiniband/hw/mlx5/wr.c | 2 +- include/linux/mlx5/driver.h | 8 -------- 6 files changed, 22 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 53dec6063245..e9d0a5269582 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -48,7 +48,7 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED && in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return true; - return dev->mdev->port_caps[port_num - 1].has_smi; + return dev->port_caps[port_num - 1].has_smi; } static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, @@ -299,7 +299,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) packet_error = be16_to_cpu(out_mad->status); - dev->mdev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? + dev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; out: @@ -549,7 +549,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); props->gid_tbl_len = out_mad->data[50]; props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); - props->pkey_tbl_len = mdev->port_caps[port - 1].pkey_table_len; + props->pkey_tbl_len = dev->port_caps[port - 1].pkey_table_len; props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; @@ -589,7 +589,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == 4) { - if (mdev->port_caps[port - 1].ext_port_cap & + if (dev->port_caps[port - 1].ext_port_cap & MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { init_query_mad(in_mad); in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 40fb86db3376..d2f185ae2b9c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2946,8 +2946,8 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) int err; int port; - for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) { - dev->mdev->port_caps[port - 1].has_smi = false; + for (port = 1; port <= ARRAY_SIZE(dev->port_caps); port++) { + dev->port_caps[port - 1].has_smi = false; if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) { if (MLX5_CAP_GEN(dev->mdev, ib_virt)) { @@ -2959,10 +2959,10 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) port, err); return err; } - dev->mdev->port_caps[port - 1].has_smi = + dev->port_caps[port - 1].has_smi = vport_ctx.has_smi; } else { - dev->mdev->port_caps[port - 1].has_smi = true; + dev->port_caps[port - 1].has_smi = true; } } } @@ -3004,10 +3004,8 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) goto out; } - dev->mdev->port_caps[port - 1].pkey_table_len = - dprops->max_pkeys; - dev->mdev->port_caps[port - 1].gid_table_len = - pprops->gid_tbl_len; + dev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys; + dev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len; mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n", port, dprops->max_pkeys, pprops->gid_tbl_len); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2fd2927abe45..c2f91c15b973 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1036,6 +1036,13 @@ struct mlx5_var_table { u64 num_var_hw_entries; }; +struct mlx5_port_caps { + int gid_table_len; + int pkey_table_len; + bool has_smi; + u8 ext_port_cap; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -1096,6 +1103,7 @@ struct mlx5_ib_dev { struct mlx5_var_table var_table; struct xarray sig_mrs; + struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 88be94215708..5274349dd998 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3177,10 +3177,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (ah_flags & IB_AH_GRH) { if (grh->sgid_index >= - dev->mdev->port_caps[port - 1].gid_table_len) { + dev->port_caps[port - 1].gid_table_len) { pr_err("sgid_index (%u) too large. max is %d\n", grh->sgid_index, - dev->mdev->port_caps[port - 1].gid_table_len); + dev->port_caps[port - 1].gid_table_len); return -EINVAL; } } @@ -4311,7 +4311,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_PKEY_INDEX) { port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; if (attr->pkey_index >= - dev->mdev->port_caps[port - 1].pkey_table_len) { + dev->port_caps[port - 1].pkey_table_len) { mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index); goto out; diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index d6038fb6c50c..cf2852cba45c 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -1369,7 +1369,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, handle_qpt_uc(wr, &seg, &size); break; case IB_QPT_SMI: - if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) { + if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) { mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n"); err = -EPERM; *bad_wr = wr; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f93bfe7473aa..11558c2e99f0 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -305,13 +305,6 @@ struct mlx5_cmd { struct mlx5_cmd_stats *stats; }; -struct mlx5_port_caps { - int gid_table_len; - int pkey_table_len; - u8 ext_port_cap; - bool has_smi; -}; - struct mlx5_cmd_mailbox { void *buf; dma_addr_t dma; @@ -694,7 +687,6 @@ struct mlx5_core_dev { u8 rev_id; char board_id[MLX5_BOARD_ID_LEN]; struct mlx5_cmd cmd; - struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; struct { u32 hca_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; From 2019d70e919f01c43975b8d9ea2803b890eabba9 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 3 Feb 2021 15:01:30 +0200 Subject: [PATCH 317/414] IB/mlx5: Avoid calling query device for reading pkey table length Pkey table length for all the ports of the device is the same. Currently get_ports_cap() reads and stores it for each port by querying the device which reads more than just pkey table length. For representor device ports which can be in range of hundreds, it queries is for each such port and end up returning same value for all the ports. When in representor mode, modify QP accesses pkey port caps for a port index that can be outside of the port_caps table. Hence, simplify the logic to query the max pkey table length only once during device initialization sequence. Link: https://lore.kernel.org/r/20210203130133.4057329-3-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mad.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 24 ++++++------------------ drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mlx5/qp.c | 17 ++++------------- 4 files changed, 12 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index e9d0a5269582..cdb47a00e516 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -549,7 +549,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); props->gid_tbl_len = out_mad->data[50]; props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); - props->pkey_tbl_len = dev->port_caps[port - 1].pkey_table_len; + props->pkey_tbl_len = dev->pkey_table_len; props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d2f185ae2b9c..364dc3e6a2de 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -816,9 +816,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (err) return err; - err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); - if (err) - return err; + props->max_pkeys = dev->pkey_table_len; err = mlx5_query_vendor_id(ibdev, &props->vendor_id); if (err) @@ -2979,7 +2977,6 @@ static void get_ext_port_caps(struct mlx5_ib_dev *dev) static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) { - struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; int err = -ENOMEM; @@ -2987,16 +2984,6 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) if (!pprops) goto out; - dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); - if (!dprops) - goto out; - - err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL); - if (err) { - mlx5_ib_warn(dev, "query_device failed %d\n", err); - goto out; - } - err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); if (err) { mlx5_ib_warn(dev, "query_port %d failed %d\n", @@ -3004,15 +2991,12 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) goto out; } - dev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys; dev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len; mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n", - port, dprops->max_pkeys, pprops->gid_tbl_len); + port, dev->pkey_table_len, pprops->gid_tbl_len); out: kfree(pprops); - kfree(dprops); - return err; } @@ -3979,6 +3963,10 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (err) goto err_mp; + err = mlx5_query_max_pkeys(&dev->ib_dev, &dev->pkey_table_len); + if (err) + goto err_mp; + if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c2f91c15b973..36a92f3c29e3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1038,7 +1038,6 @@ struct mlx5_var_table { struct mlx5_port_caps { int gid_table_len; - int pkey_table_len; bool has_smi; u8 ext_port_cap; }; @@ -1104,6 +1103,7 @@ struct mlx5_ib_dev { struct xarray sig_mrs; struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; + u16 pkey_table_len; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5274349dd998..475470237e0b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4231,7 +4231,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; int err = -EINVAL; - int port; if (!mlx5_ib_modify_qp_allowed(dev, qp, ibqp->qp_type)) return -EOPNOTSUPP; @@ -4276,10 +4275,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) { - port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - } - if (qp->flags & IB_QP_CREATE_SOURCE_QPN) { if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) { mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n", @@ -4308,14 +4303,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if (attr_mask & IB_QP_PKEY_INDEX) { - port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= - dev->port_caps[port - 1].pkey_table_len) { - mlx5_ib_dbg(dev, "invalid pkey index %d\n", - attr->pkey_index); - goto out; - } + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= dev->pkey_table_len) { + mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index); + goto out; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && From 7a58779edd75c37352917e8fbc7769efce0e65b6 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 3 Feb 2021 15:01:31 +0200 Subject: [PATCH 318/414] IB/mlx5: Improve query port for representor port Improve query port functionality for representor port as below. 1. RoCE Qkey violation counters are not applicable for representor port. 2. Avoid setting gid_tbl_len twice for representor port. 3. Avoid setting ip_gids and IB_PORT_CM_SUP property for representor port as GID table is empty and CM support is not present in representor mode. Link: https://lore.kernel.org/r/20210203130133.4057329-4-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 364dc3e6a2de..176f2a866f12 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -462,7 +462,6 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; bool put_mdev = true; - u16 qkey_viol_cntr; u32 eth_prot_oper; u8 mdev_port_num; bool ext; @@ -500,20 +499,22 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width, ext); - props->port_cap_flags |= IB_PORT_CM_SUP; - props->ip_gids = true; + if (!dev->is_rep && mlx5_is_roce_enabled(mdev)) { + u16 qkey_viol_cntr; - props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, - roce_address_table_size); + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; + props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, + roce_address_table_size); + mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); + props->qkey_viol_cntr = qkey_viol_cntr; + } props->max_mtu = IB_MTU_4096; props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); props->pkey_tbl_len = 1; props->state = IB_PORT_DOWN; props->phys_state = IB_PORT_PHYS_STATE_DISABLED; - mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); - props->qkey_viol_cntr = qkey_viol_cntr; - /* If this is a stub query for an unaffiliated port stop here */ if (!put_mdev) goto out; @@ -1383,19 +1384,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { - int ret; - - /* Only link layer == ethernet is valid for representors - * and we always use port 1 - */ - ret = mlx5_query_port_roce(ibdev, port, props); - if (ret || !props) - return ret; - - /* We don't support GIDS */ - props->gid_tbl_len = 0; - - return ret; + return mlx5_query_port_roce(ibdev, port, props); } static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u8 port, u16 index, From 7416790e22452bfa86de6b55638eacf7780c8f6f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 3 Feb 2021 15:01:32 +0200 Subject: [PATCH 319/414] RDMA/core: Introduce and use API to read port immutable data Currently mlx5 driver caches port GID table length for 2 ports. It is also cached by IB core as port immutable data. When mlx5 representor ports are present, which are usually more than 2, invalid access to port_caps array can happen while validating the GID table length which is only for 2 ports. To avoid this, take help of the IB cores port immutable data by exposing an API to read the port immutable fields. Remove mlx5 driver's internal cache, thereby reduce code and data. Link: https://lore.kernel.org/r/20210203130133.4057329-5-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 14 +++++++ drivers/infiniband/hw/mlx5/main.c | 55 +--------------------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - drivers/infiniband/hw/mlx5/qp.c | 8 ++-- include/rdma/ib_verbs.h | 3 ++ 5 files changed, 23 insertions(+), 58 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index c895d7bfa512..051c018fb73c 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -848,6 +848,20 @@ static int setup_port_data(struct ib_device *device) return 0; } +/** + * ib_port_immutable_read() - Read rdma port's immutable data + * @dev - IB device + * @port - port number whose immutable data to read. It starts with index 1 and + * valid upto including rdma_end_port(). + */ +const struct ib_port_immutable* +ib_port_immutable_read(struct ib_device *dev, unsigned int port) +{ + WARN_ON(!rdma_is_port_valid(dev, port)); + return &dev->port_data[port].immutable; +} +EXPORT_SYMBOL(ib_port_immutable_read); + void ib_get_device_fw_str(struct ib_device *dev, char *str) { if (dev->ops.get_dev_fw_str) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 176f2a866f12..1e1e3edcb1d5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2964,41 +2964,6 @@ static void get_ext_port_caps(struct mlx5_ib_dev *dev) mlx5_query_ext_port_caps(dev, port); } -static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) -{ - struct ib_port_attr *pprops = NULL; - int err = -ENOMEM; - - pprops = kzalloc(sizeof(*pprops), GFP_KERNEL); - if (!pprops) - goto out; - - err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); - if (err) { - mlx5_ib_warn(dev, "query_port %d failed %d\n", - port, err); - goto out; - } - - dev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len; - mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n", - port, dev->pkey_table_len, pprops->gid_tbl_len); - -out: - kfree(pprops); - return err; -} - -static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) -{ - /* For representors use port 1, is this is the only native - * port - */ - if (dev->is_rep) - return __get_port_caps(dev, 1); - return __get_port_caps(dev, port); -} - static u8 mlx5_get_umr_fence(u8 umr_fence_cap) { switch (umr_fence_cap) { @@ -3472,10 +3437,6 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, if (err) goto unbind; - err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev)); - if (err) - goto unbind; - err = mlx5_add_netdev_notifier(ibdev, port_num); if (err) { mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n", @@ -3553,11 +3514,9 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) break; } } - if (!bound) { - get_port_caps(dev, i + 1); + if (!bound) mlx5_ib_dbg(dev, "no free port found for port %d\n", i + 1); - } } list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list); @@ -3940,18 +3899,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (err) goto err_mp; - if (!mlx5_core_mp_enabled(mdev)) { - for (i = 1; i <= dev->num_ports; i++) { - err = get_port_caps(dev, i); - if (err) - break; - } - } else { - err = get_port_caps(dev, mlx5_core_native_port_num(mdev)); - } - if (err) - goto err_mp; - err = mlx5_query_max_pkeys(&dev->ib_dev, &dev->pkey_table_len); if (err) goto err_mp; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 36a92f3c29e3..0f567d570230 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1037,7 +1037,6 @@ struct mlx5_var_table { }; struct mlx5_port_caps { - int gid_table_len; bool has_smi; u8 ext_port_cap; }; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 475470237e0b..358c44c5a8fc 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3176,11 +3176,13 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, alt ? attr->alt_pkey_index : attr->pkey_index); if (ah_flags & IB_AH_GRH) { - if (grh->sgid_index >= - dev->port_caps[port - 1].gid_table_len) { + const struct ib_port_immutable *immutable; + + immutable = ib_port_immutable_read(&dev->ib_dev, port); + if (grh->sgid_index >= immutable->gid_tbl_len) { pr_err("sgid_index (%u) too large. max is %d\n", grh->sgid_index, - dev->port_caps[port - 1].gid_table_len); + immutable->gid_tbl_len); return -EINVAL; } } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 62e574c50555..ca28fca5736b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4674,4 +4674,7 @@ static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn) return (u32)(v & IB_GRH_FLOWLABEL_MASK); } + +const struct ib_port_immutable* +ib_port_immutable_read(struct ib_device *dev, unsigned int port); #endif /* IB_VERBS_H */ From 131796524fa346553d9a325ed77bbee0b63d0ca9 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 3 Feb 2021 15:01:33 +0200 Subject: [PATCH 320/414] IB/mlx5: Use rdma_for_each_port for port iteration Instead of open coding the loop for port iteration, use rdma_for_each_port macro provided by core. To use such macro, early initialization of phys_port_cnt is needed. Hence, initialize such constant early enough in the init stage. Whichever functions are called with port using rdma_for_each_port(), convert their port type from u8 to unsigned int to match the core API. Link: https://lore.kernel.org/r/20210203130133.4057329-6-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mad.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 15 ++++++++------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index cdb47a00e516..652c6ccf1881 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -279,7 +279,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } -int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1e1e3edcb1d5..ce4a9eba53f9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2958,9 +2958,9 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) static void get_ext_port_caps(struct mlx5_ib_dev *dev) { - int port; + unsigned int port; - for (port = 1; port <= dev->num_ports; port++) + rdma_for_each_port (&dev->ib_dev, port) mlx5_query_ext_port_caps(dev, port); } @@ -3881,6 +3881,12 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) int err; int i; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; + dev->ib_dev.phys_port_cnt = dev->num_ports; + dev->ib_dev.dev.parent = mdev->device; + dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; + for (i = 0; i < dev->num_ports; i++) { spin_lock_init(&dev->port[i].mp.mpi_lock); rwlock_init(&dev->port[i].roce.netdev_lock); @@ -3906,12 +3912,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); - dev->ib_dev.node_type = RDMA_NODE_IB_CA; - dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; - dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); - dev->ib_dev.dev.parent = mdev->device; - dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; err = init_srcu_struct(&dev->odp_srcu); if (err) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 0f567d570230..ab52083634e6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1299,7 +1299,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port); int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid); int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev, From 7d9ae80e31df57dd3253e1ec514f0000aa588a81 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 27 Jan 2021 15:45:01 -0600 Subject: [PATCH 321/414] RDMA/rxe: Fix coding error in rxe_recv.c check_type_state() in rxe_recv.c is written as if the type bits in the packet opcode were a bit mask which is not correct. This patch corrects this code to compare all 3 type bits to the required type. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20210127214500.3707-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_recv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index c9984a28eecc..db0ee5c3962e 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -9,21 +9,26 @@ #include "rxe.h" #include "rxe_loc.h" +/* check that QP matches packet opcode type and is in a valid state */ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct rxe_qp *qp) { + unsigned int pkt_type; + if (unlikely(!qp->valid)) goto err1; + pkt_type = pkt->opcode & 0xe0; + switch (qp_type(qp)) { case IB_QPT_RC: - if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) { + if (unlikely(pkt_type != IB_OPCODE_RC)) { pr_warn_ratelimited("bad qp type\n"); goto err1; } break; case IB_QPT_UC: - if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) { + if (unlikely(pkt_type != IB_OPCODE_UC)) { pr_warn_ratelimited("bad qp type\n"); goto err1; } @@ -31,7 +36,7 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, case IB_QPT_UD: case IB_QPT_SMI: case IB_QPT_GSI: - if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) { + if (unlikely(pkt_type != IB_OPCODE_UD)) { pr_warn_ratelimited("bad qp type\n"); goto err1; } From e328197423e09094aff48619ebef6671ff64d3b2 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 27 Jan 2021 16:42:04 -0600 Subject: [PATCH 322/414] RDMA/rxe: Remove useless code in rxe_recv.c In check_keys() in rxe_recv.c if ((...) && pkt->mask) { ... } always has pkt->mask non zero since in rxe_udp_encap_recv() pkt->mask is always set to RXE_GRH_MASK (!= 0). There is no obvious reason for this additional test and the original intent is lost. This patch simplifies the expression. Fixes: 8b7b59d030cc ("IB/rxe: remove redudant qpn check") Link: https://lore.kernel.org/r/20210127224203.2812-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_recv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index db0ee5c3962e..0dd163b745fe 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -90,8 +90,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, goto err1; } - if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && - pkt->mask) { + if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) { u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; if (unlikely(deth_qkey(pkt) != qkey)) { From 8fc1b7027fc162738d5a85c82410e501a371a404 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 28 Jan 2021 11:47:53 -0600 Subject: [PATCH 323/414] RDMA/rxe: Fix coding error in rxe_rcv_mcast_pkt rxe_rcv_mcast_pkt() in rxe_recv.c can leak SKBs in error path code. The loop over the QPs attached to a multicast group creates new cloned SKBs for all but the last QP in the list and passes the SKB and its clones to rxe_rcv_pkt() for further processing. Any QPs that do not pass some checks are skipped. If the last QP in the list fails the tests the SKB is leaked. This patch checks if the SKB for the last QP was used and if not frees it. Also removes a redundant loop invariant assignment. Fixes: 8700e3e7c485 ("Soft RoCE driver") Fixes: 71abf20b28ff ("RDMA/rxe: Handle skb_clone() failure in rxe_recv.c") Link: https://lore.kernel.org/r/20210128174752.16128-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_recv.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 0dd163b745fe..2bbcea61b780 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -256,7 +256,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) list_for_each_entry(mce, &mcg->qp_list, qp_list) { qp = mce->qp; - pkt = SKB_TO_PKT(skb); /* validate qp for incoming packet */ err = check_type_state(rxe, pkt, qp); @@ -268,12 +267,18 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) continue; /* for all but the last qp create a new clone of the - * skb and pass to the qp. + * skb and pass to the qp. If an error occurs in the + * checks for the last qp in the list we need to + * free the skb since it hasn't been passed on to + * rxe_rcv_pkt() which would free it later. */ - if (mce->qp_list.next != &mcg->qp_list) + if (mce->qp_list.next != &mcg->qp_list) { per_qp_skb = skb_clone(skb, GFP_ATOMIC); - else + } else { per_qp_skb = skb; + /* show we have consumed the skb */ + skb = NULL; + } if (unlikely(!per_qp_skb)) continue; @@ -288,9 +293,8 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ - return; - err1: + /* free skb if not consumed */ kfree_skb(skb); } From 5120bf0a5fc15dec210a0fe0f39e4a256bb6e349 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 28 Jan 2021 12:23:02 -0600 Subject: [PATCH 324/414] RDMA/rxe: Correct skb on loopback path rxe_net.c sends packets at the IP layer with skb->data pointing at the IP header but receives packets from a UDP tunnel with skb->data pointing at the UDP header. On the loopback path this was not correctly accounted for. This patch corrects for this by using sbk_pull() to strip the IP header from the skb on received packets. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20210128182301.16859-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index c4b06ced30a7..0d4125b867b7 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -408,6 +408,11 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) void rxe_loopback(struct sk_buff *skb) { + if (skb->protocol == htons(ETH_P_IP)) + skb_pull(skb, sizeof(struct iphdr)); + else + skb_pull(skb, sizeof(struct ipv6hdr)); + rxe_rcv(skb); } From a92a90ac62d3284a8deaef43c6ea4dbd5dd878b0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Feb 2021 15:46:17 +0300 Subject: [PATCH 325/414] iommu/mediatek: Fix error code in probe() This error path is supposed to return -EINVAL. It used to return directly but we added some clean up and accidentally removed the error code. Also I fixed a typo in the error message. Fixes: c0b57581b73b ("iommu/mediatek: Add power-domain operation") Signed-off-by: Dan Carpenter Reviewed-by: Yong Wu Link: https://lore.kernel.org/r/YB0+GU5akSdu29Vu@mwanda Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 1f262621ef19..6ecc007f07cd 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -886,7 +886,8 @@ static int mtk_iommu_probe(struct platform_device *pdev) link = device_link_add(data->smicomm_dev, dev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); if (!link) { - dev_err(dev, "Unable link %s.\n", dev_name(data->smicomm_dev)); + dev_err(dev, "Unable to link %s.\n", dev_name(data->smicomm_dev)); + ret = -EINVAL; goto out_runtime_disable; } From cc6e70bd5b0b2078eb558175db836f1464e23f5c Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 8 Feb 2021 07:10:25 +0100 Subject: [PATCH 326/414] MAINTAINERS: repair file pattern in MEDIATEK IOMMU DRIVER Commit 6af4873852c4 ("MAINTAINERS: Add entry for MediaTek IOMMU") mentions the pattern 'drivers/iommu/mtk-iommu*', but the files are actually named with an underscore, not with a hyphen. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains: warning: no file matches F: drivers/iommu/mtk-iommu* Repair this minor typo in the file pattern. Signed-off-by: Lukas Bulwahn Acked-by: Yong Wu Link: https://lore.kernel.org/r/20210208061025.29198-1-lukas.bulwahn@gmail.com Signed-off-by: Joerg Roedel --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index b50eb5320189..27e525f36bc5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11184,7 +11184,7 @@ L: iommu@lists.linux-foundation.org L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) S: Supported F: Documentation/devicetree/bindings/iommu/mediatek* -F: drivers/iommu/mtk-iommu* +F: drivers/iommu/mtk_iommu* F: include/dt-bindings/memory/mt*-port.h MEDIATEK JPEG DRIVER From 93f7a6d818deef69d0ba652d46bae6fbabbf365c Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Fri, 22 Jan 2021 16:35:56 +0530 Subject: [PATCH 327/414] kdb: Make memory allocations more robust Currently kdb uses in_interrupt() to determine whether its library code has been called from the kgdb trap handler or from a saner calling context such as driver init. This approach is broken because in_interrupt() alone isn't able to determine kgdb trap handler entry from normal task context. This can happen during normal use of basic features such as breakpoints and can also be trivially reproduced using: echo g > /proc/sysrq-trigger We can improve this by adding check for in_dbg_master() instead which explicitly determines if we are running in debugger context. Cc: stable@vger.kernel.org Signed-off-by: Sumit Garg Link: https://lore.kernel.org/r/1611313556-4004-1-git-send-email-sumit.garg@linaro.org Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 0a56d35f6eea..6cb92f7bbbd0 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -230,7 +230,7 @@ extern struct task_struct *kdb_curr_task(int); #define kdb_task_has_cpu(p) (task_curr(p)) -#define GFP_KDB (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) +#define GFP_KDB (in_dbg_master() ? GFP_ATOMIC : GFP_KERNEL) extern void *debug_kmalloc(size_t size, gfp_t flags); extern void debug_kfree(void *); From 899aba891cab1555c9ca16a558769efb177baf44 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 28 Jan 2021 17:33:19 -0600 Subject: [PATCH 328/414] RDMA/rxe: Fix FIXME in rxe_udp_encap_recv() rxe_udp_encap_recv() drops the reference to rxe->ib_dev taken by rxe_get_dev_from_net() which should be held until each received skb is freed. This patch moves the calls to ib_device_put() to each place a received skb is freed. It also takes references to the ib_device for each cloned skb created to process received multicast packets. Fixes: 4c173f596b3f ("RDMA/rxe: Use ib_device_get_by_netdev() instead of open coding") Link: https://lore.kernel.org/r/20210128233318.2591-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 49 ++++++++++++---------------- drivers/infiniband/sw/rxe/rxe_net.c | 12 +++---- drivers/infiniband/sw/rxe/rxe_recv.c | 6 ++++ drivers/infiniband/sw/rxe/rxe_resp.c | 3 ++ 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 0a1e6393250b..a8ac791a1bb9 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -515,6 +515,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) while ((skb = skb_dequeue(&qp->resp_pkts))) { rxe_drop_ref(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } while ((wqe = queue_head(qp->sq.queue))) { @@ -527,6 +528,17 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) } } +static void free_pkt(struct rxe_pkt_info *pkt) +{ + struct sk_buff *skb = PKT_TO_SKB(pkt); + struct rxe_qp *qp = pkt->qp; + struct ib_device *dev = qp->ibqp.device; + + kfree_skb(skb); + rxe_drop_ref(qp); + ib_device_put(dev); +} + int rxe_completer(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; @@ -624,11 +636,8 @@ int rxe_completer(void *arg) break; case COMPST_DONE: - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } + if (pkt) + free_pkt(pkt); goto done; case COMPST_EXIT: @@ -671,12 +680,8 @@ int rxe_completer(void *arg) */ if (qp->comp.started_retry && !qp->comp.timeout_retry) { - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - + if (pkt) + free_pkt(pkt); goto done; } @@ -699,13 +704,8 @@ int rxe_completer(void *arg) qp->comp.started_retry = 1; rxe_run_task(&qp->req.task, 0); } - - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - + if (pkt) + free_pkt(pkt); goto done; } else { @@ -726,9 +726,7 @@ int rxe_completer(void *arg) mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; + free_pkt(pkt); goto exit; } else { rxe_counter_inc(rxe, @@ -742,13 +740,8 @@ int rxe_completer(void *arg) WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); do_complete(qp, wqe); rxe_qp_error(qp); - - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - + if (pkt) + free_pkt(pkt); goto exit; } } diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0d4125b867b7..36d56163afac 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -152,10 +152,14 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev, static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct udphdr *udph; + struct rxe_dev *rxe; struct net_device *ndev = skb->dev; - struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + /* takes a reference on rxe->ib_dev + * drop when skb is freed + */ + rxe = rxe_get_dev_from_net(ndev); if (!rxe) goto drop; @@ -174,12 +178,6 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) rxe_rcv(skb); - /* - * FIXME: this is in the wrong place, it needs to be done when pkt is - * destroyed - */ - ib_device_put(&rxe->ib_dev); - return 0; drop: kfree_skb(skb); diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 2bbcea61b780..8a48a33d587b 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -274,6 +274,10 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) */ if (mce->qp_list.next != &mcg->qp_list) { per_qp_skb = skb_clone(skb, GFP_ATOMIC); + if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) { + kfree_skb(per_qp_skb); + continue; + } } else { per_qp_skb = skb; /* show we have consumed the skb */ @@ -296,6 +300,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) err1: /* free skb if not consumed */ kfree_skb(skb); + ib_device_put(&rxe->ib_dev); } /** @@ -405,4 +410,5 @@ void rxe_rcv(struct sk_buff *skb) rxe_drop_ref(pkt->qp); kfree_skb(skb); + ib_device_put(&rxe->ib_dev); } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 5a098083a9d2..5fd26786d79b 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -99,6 +99,7 @@ static inline enum resp_states get_req(struct rxe_qp *qp, while ((skb = skb_dequeue(&qp->req_pkts))) { rxe_drop_ref(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } /* go drain recv wr queue */ @@ -1012,6 +1013,7 @@ static enum resp_states cleanup(struct rxe_qp *qp, skb = skb_dequeue(&qp->req_pkts); rxe_drop_ref(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } if (qp->resp.mr) { @@ -1176,6 +1178,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) while ((skb = skb_dequeue(&qp->req_pkts))) { rxe_drop_ref(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } if (notify) From cfd607e43da4a20753744f134e201310262b827a Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Wed, 2 Dec 2020 11:08:21 -0800 Subject: [PATCH 329/414] kunit: tool: fix unit test cleanup handling * Stop leaking file objects. * Use self.addCleanup() to ensure we call cleanup functions even if setUp() fails. * use mock.patch.stopall instead of more error-prone manual approach Signed-off-by: Daniel Latypov Reviewed-by: David Gow Tested-by: Brendan Higgins Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_tool_test.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index b593f4448e83..9a036e9d4455 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -288,19 +288,17 @@ class StrContains(str): class KUnitMainTest(unittest.TestCase): def setUp(self): path = get_absolute_path('test_data/test_is_test_passed-all_passed.log') - file = open(path) - all_passed_log = file.readlines() - self.print_patch = mock.patch('builtins.print') - self.print_mock = self.print_patch.start() + with open(path) as file: + all_passed_log = file.readlines() + + self.print_mock = mock.patch('builtins.print').start() + self.addCleanup(mock.patch.stopall) + self.linux_source_mock = mock.Mock() self.linux_source_mock.build_reconfig = mock.Mock(return_value=True) self.linux_source_mock.build_um_kernel = mock.Mock(return_value=True) self.linux_source_mock.run_kernel = mock.Mock(return_value=all_passed_log) - def tearDown(self): - self.print_patch.stop() - pass - def test_config_passes_args_pass(self): kunit.main(['config', '--build_dir=.kunit'], self.linux_source_mock) assert self.linux_source_mock.build_reconfig.call_count == 1 From 0b3e68076bb9a8e1b1bd448994b9c57828173d8e Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Wed, 2 Dec 2020 11:08:22 -0800 Subject: [PATCH 330/414] kunit: tool: stop using bare asserts in unit test Use self.assertEqual/assertNotEqual() instead. Besides being more appropriate in a unit test, it'll also give a better error message by show the unexpected values. Also * Delete redundant check of exception types. self.assertRaises does this. * s/kall/call. There's no reason to name it this way. * This is probably a misunderstanding from the docs which uses it since `mock.call` is in scope as `call`. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Tested-by: Brendan Higgins Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_tool_test.py | 50 +++++++++++++------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 9a036e9d4455..72f6b4bb1b5a 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -301,26 +301,26 @@ class KUnitMainTest(unittest.TestCase): def test_config_passes_args_pass(self): kunit.main(['config', '--build_dir=.kunit'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 - assert self.linux_source_mock.run_kernel.call_count == 0 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0) def test_build_passes_args_pass(self): kunit.main(['build'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 0 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0) self.linux_source_mock.build_um_kernel.assert_called_once_with(False, 8, '.kunit', None) - assert self.linux_source_mock.run_kernel.call_count == 0 + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0) def test_exec_passes_args_pass(self): kunit.main(['exec'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 0 - assert self.linux_source_mock.run_kernel.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_passes_args_pass(self): kunit.main(['run'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 - assert self.linux_source_mock.run_kernel.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( build_dir='.kunit', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) @@ -329,35 +329,33 @@ class KUnitMainTest(unittest.TestCase): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) with self.assertRaises(SystemExit) as e: kunit.main(['exec'], self.linux_source_mock) - assert type(e.exception) == SystemExit - assert e.exception.code == 1 + self.assertEqual(e.exception.code, 1) def test_run_passes_args_fail(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) with self.assertRaises(SystemExit) as e: kunit.main(['run'], self.linux_source_mock) - assert type(e.exception) == SystemExit - assert e.exception.code == 1 - assert self.linux_source_mock.build_reconfig.call_count == 1 - assert self.linux_source_mock.run_kernel.call_count == 1 + self.assertEqual(e.exception.code, 1) + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) self.print_mock.assert_any_call(StrContains(' 0 tests run')) def test_exec_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) kunit.main(['exec', '--raw_output'], self.linux_source_mock) - assert self.linux_source_mock.run_kernel.call_count == 1 - for kall in self.print_mock.call_args_list: - assert kall != mock.call(StrContains('Testing complete.')) - assert kall != mock.call(StrContains(' 0 tests run')) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) + for call in self.print_mock.call_args_list: + self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) + self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) def test_run_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) kunit.main(['run', '--raw_output'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 - assert self.linux_source_mock.run_kernel.call_count == 1 - for kall in self.print_mock.call_args_list: - assert kall != mock.call(StrContains('Testing complete.')) - assert kall != mock.call(StrContains(' 0 tests run')) + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) + for call in self.print_mock.call_args_list: + self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) + self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) def test_exec_timeout(self): timeout = 3453 @@ -368,7 +366,7 @@ class KUnitMainTest(unittest.TestCase): def test_run_timeout(self): timeout = 3453 kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( build_dir='.kunit', timeout=timeout) self.print_mock.assert_any_call(StrContains('Testing complete.')) @@ -376,7 +374,7 @@ class KUnitMainTest(unittest.TestCase): def test_run_builddir(self): build_dir = '.kunit' kunit.main(['run', '--build_dir=.kunit'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( build_dir=build_dir, timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) @@ -384,7 +382,7 @@ class KUnitMainTest(unittest.TestCase): def test_config_builddir(self): build_dir = '.kunit' kunit.main(['config', '--build_dir', build_dir], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) def test_build_builddir(self): build_dir = '.kunit' From a3ece0795b9ab234ff196e74606fdef9f463ec5a Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Wed, 2 Dec 2020 11:08:23 -0800 Subject: [PATCH 331/414] kunit: tool: use `with open()` in unit test The use of manual open() and .close() calls seems to be an attempt to keep the contents in scope. But Python doesn't restrict variables like that, so we can introduce new variables inside of a `with` and use them outside. Do so to make the code more Pythonic. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Tested-by: Brendan Higgins Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_tool_test.py | 33 +++++++++++--------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 72f6b4bb1b5a..9a36936749f0 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -100,15 +100,14 @@ class KUnitParserTest(unittest.TestCase): def test_output_isolated_correctly(self): log_path = get_absolute_path( 'test_data/test_output_isolated_correctly.log') - file = open(log_path) - result = kunit_parser.isolate_kunit_output(file.readlines()) + with open(log_path) as file: + result = kunit_parser.isolate_kunit_output(file.readlines()) self.assertContains('TAP version 14', result) self.assertContains(' # Subtest: example', result) self.assertContains(' 1..2', result) self.assertContains(' ok 1 - example_simple_test', result) self.assertContains(' ok 2 - example_mock_test', result) self.assertContains('ok 1 - example', result) - file.close() def test_output_with_prefix_isolated_correctly(self): log_path = get_absolute_path( @@ -143,42 +142,39 @@ class KUnitParserTest(unittest.TestCase): def test_parse_successful_test_log(self): all_passed_log = get_absolute_path( 'test_data/test_is_test_passed-all_passed.log') - file = open(all_passed_log) - result = kunit_parser.parse_run_tests(file.readlines()) + with open(all_passed_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - file.close() def test_parse_failed_test_log(self): failed_log = get_absolute_path( 'test_data/test_is_test_passed-failure.log') - file = open(failed_log) - result = kunit_parser.parse_run_tests(file.readlines()) + with open(failed_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( kunit_parser.TestStatus.FAILURE, result.status) - file.close() def test_no_tests(self): empty_log = get_absolute_path( 'test_data/test_is_test_passed-no_tests_run.log') - file = open(empty_log) - result = kunit_parser.parse_run_tests( - kunit_parser.isolate_kunit_output(file.readlines())) + with open(empty_log) as file: + result = kunit_parser.parse_run_tests( + kunit_parser.isolate_kunit_output(file.readlines())) self.assertEqual(0, len(result.suites)) self.assertEqual( kunit_parser.TestStatus.NO_TESTS, result.status) - file.close() def test_no_kunit_output(self): crash_log = get_absolute_path( 'test_data/test_insufficient_memory.log') - file = open(crash_log) print_mock = mock.patch('builtins.print').start() - result = kunit_parser.parse_run_tests( - kunit_parser.isolate_kunit_output(file.readlines())) + with open(crash_log) as file: + result = kunit_parser.parse_run_tests( + kunit_parser.isolate_kunit_output(file.readlines())) print_mock.assert_any_call(StrContains('no tests run!')) print_mock.stop() file.close() @@ -186,12 +182,11 @@ class KUnitParserTest(unittest.TestCase): def test_crashed_test(self): crashed_log = get_absolute_path( 'test_data/test_is_test_passed-crash.log') - file = open(crashed_log) - result = kunit_parser.parse_run_tests(file.readlines()) + with open(crashed_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( kunit_parser.TestStatus.TEST_CRASHED, result.status) - file.close() def test_ignores_prefix_printk_time(self): prefix_log = get_absolute_path( From cd4a9bc8e0472da94f60f980d325c4825eacd918 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Wed, 2 Dec 2020 11:08:24 -0800 Subject: [PATCH 332/414] minor: kunit: tool: fix unit test so it can run from non-root dir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also take this time to rename get_absolute_path() to test_data_path(). 1. the name is currently a lie. It gives relative paths, e.g. if I run from the same dir as the test file, it gives './test_data/' See https://docs.python.org/3/reference/import.html#__file__, which doesn't stipulate that implementations provide absolute paths. 2. it's only used for generating paths to tools/testing/kunit/test_data/ So we can tersen things by making it less general. Cache the absolute path to the test data files per suggestion from [1]. Using relative paths, the tests break because of this code in kunit.py if get_kernel_root_path():         os.chdir(get_kernel_root_path()) [1] https://lore.kernel.org/linux-kselftest/CABVgOSnH0gz7z5JhRCGyG1wg0zDDBTLoSUCoB-gWMeXLgVTo2w@mail.gmail.com/ Fixes: 5578d008d9e0 ("kunit: tool: fix running kunit_tool from outside kernel tree") Signed-off-by: Daniel Latypov Reviewed-by: David Gow Tested-by: Brendan Higgins Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_tool_test.py | 60 +++++++++++--------------- 1 file changed, 24 insertions(+), 36 deletions(-) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 9a36936749f0..888f42129f20 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -21,16 +21,18 @@ import kunit_json import kunit test_tmpdir = '' +abs_test_data_dir = '' def setUpModule(): - global test_tmpdir + global test_tmpdir, abs_test_data_dir test_tmpdir = tempfile.mkdtemp() + abs_test_data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'test_data')) def tearDownModule(): shutil.rmtree(test_tmpdir) -def get_absolute_path(path): - return os.path.join(os.path.dirname(__file__), path) +def test_data_path(path): + return os.path.join(abs_test_data_dir, path) class KconfigTest(unittest.TestCase): @@ -46,8 +48,7 @@ class KconfigTest(unittest.TestCase): def test_read_from_file(self): kconfig = kunit_config.Kconfig() - kconfig_path = get_absolute_path( - 'test_data/test_read_from_file.kconfig') + kconfig_path = test_data_path('test_read_from_file.kconfig') kconfig.read_from_file(kconfig_path) @@ -98,8 +99,7 @@ class KUnitParserTest(unittest.TestCase): str(needle) + '" not found in "' + str(haystack) + '"!') def test_output_isolated_correctly(self): - log_path = get_absolute_path( - 'test_data/test_output_isolated_correctly.log') + log_path = test_data_path('test_output_isolated_correctly.log') with open(log_path) as file: result = kunit_parser.isolate_kunit_output(file.readlines()) self.assertContains('TAP version 14', result) @@ -110,8 +110,7 @@ class KUnitParserTest(unittest.TestCase): self.assertContains('ok 1 - example', result) def test_output_with_prefix_isolated_correctly(self): - log_path = get_absolute_path( - 'test_data/test_pound_sign.log') + log_path = test_data_path('test_pound_sign.log') with open(log_path) as file: result = kunit_parser.isolate_kunit_output(file.readlines()) self.assertContains('TAP version 14', result) @@ -140,8 +139,7 @@ class KUnitParserTest(unittest.TestCase): self.assertContains('ok 3 - string-stream-test', result) def test_parse_successful_test_log(self): - all_passed_log = get_absolute_path( - 'test_data/test_is_test_passed-all_passed.log') + all_passed_log = test_data_path('test_is_test_passed-all_passed.log') with open(all_passed_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -149,8 +147,7 @@ class KUnitParserTest(unittest.TestCase): result.status) def test_parse_failed_test_log(self): - failed_log = get_absolute_path( - 'test_data/test_is_test_passed-failure.log') + failed_log = test_data_path('test_is_test_passed-failure.log') with open(failed_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -158,8 +155,7 @@ class KUnitParserTest(unittest.TestCase): result.status) def test_no_tests(self): - empty_log = get_absolute_path( - 'test_data/test_is_test_passed-no_tests_run.log') + empty_log = test_data_path('test_is_test_passed-no_tests_run.log') with open(empty_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.isolate_kunit_output(file.readlines())) @@ -169,8 +165,7 @@ class KUnitParserTest(unittest.TestCase): result.status) def test_no_kunit_output(self): - crash_log = get_absolute_path( - 'test_data/test_insufficient_memory.log') + crash_log = test_data_path('test_insufficient_memory.log') print_mock = mock.patch('builtins.print').start() with open(crash_log) as file: result = kunit_parser.parse_run_tests( @@ -180,8 +175,7 @@ class KUnitParserTest(unittest.TestCase): file.close() def test_crashed_test(self): - crashed_log = get_absolute_path( - 'test_data/test_is_test_passed-crash.log') + crashed_log = test_data_path('test_is_test_passed-crash.log') with open(crashed_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -189,8 +183,7 @@ class KUnitParserTest(unittest.TestCase): result.status) def test_ignores_prefix_printk_time(self): - prefix_log = get_absolute_path( - 'test_data/test_config_printk_time.log') + prefix_log = test_data_path('test_config_printk_time.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -199,8 +192,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_ignores_multiple_prefixes(self): - prefix_log = get_absolute_path( - 'test_data/test_multiple_prefixes.log') + prefix_log = test_data_path('test_multiple_prefixes.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -209,8 +201,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_mixed_kernel_output(self): - mixed_prefix_log = get_absolute_path( - 'test_data/test_interrupted_tap_output.log') + mixed_prefix_log = test_data_path('test_interrupted_tap_output.log') with open(mixed_prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -219,7 +210,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_poundsign(self): - pound_log = get_absolute_path('test_data/test_pound_sign.log') + pound_log = test_data_path('test_pound_sign.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -228,7 +219,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_kernel_panic_end(self): - panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log') + panic_log = test_data_path('test_kernel_panic_interrupt.log') with open(panic_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -237,7 +228,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_pound_no_prefix(self): - pound_log = get_absolute_path('test_data/test_pound_no_prefix.log') + pound_log = test_data_path('test_pound_no_prefix.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -248,7 +239,7 @@ class KUnitParserTest(unittest.TestCase): class KUnitJsonTest(unittest.TestCase): def _json_for(self, log_file): - with(open(get_absolute_path(log_file))) as file: + with open(test_data_path(log_file)) as file: test_result = kunit_parser.parse_run_tests(file) json_obj = kunit_json.get_json_result( test_result=test_result, @@ -258,22 +249,19 @@ class KUnitJsonTest(unittest.TestCase): return json.loads(json_obj) def test_failed_test_json(self): - result = self._json_for( - 'test_data/test_is_test_passed-failure.log') + result = self._json_for('test_is_test_passed-failure.log') self.assertEqual( {'name': 'example_simple_test', 'status': 'FAIL'}, result["sub_groups"][1]["test_cases"][0]) def test_crashed_test_json(self): - result = self._json_for( - 'test_data/test_is_test_passed-crash.log') + result = self._json_for('test_is_test_passed-crash.log') self.assertEqual( {'name': 'example_simple_test', 'status': 'ERROR'}, result["sub_groups"][1]["test_cases"][0]) def test_no_tests_json(self): - result = self._json_for( - 'test_data/test_is_test_passed-no_tests_run.log') + result = self._json_for('test_is_test_passed-no_tests_run.log') self.assertEqual(0, len(result['sub_groups'])) class StrContains(str): @@ -282,7 +270,7 @@ class StrContains(str): class KUnitMainTest(unittest.TestCase): def setUp(self): - path = get_absolute_path('test_data/test_is_test_passed-all_passed.log') + path = test_data_path('test_is_test_passed-all_passed.log') with open(path) as file: all_passed_log = file.readlines() From d3bae4a0b6e1bfbfcff3dbc2a6d96a505e31677e Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 8 Dec 2020 15:21:02 -0800 Subject: [PATCH 333/414] kunit: tool: simplify kconfig is_subset_of() logic Don't use an O(nm) algorithm* and make it more readable by using a dict. *Most obviously, it does a nested for-loop over the entire other config. A bit more subtle, it calls .entries(), which constructs a set from the list for _every_ outer iteration. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Tested-by: Brendan Higgins Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_config.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index bdd60230764b..0b550cbd667d 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -41,15 +41,14 @@ class Kconfig(object): self._entries.append(entry) def is_subset_of(self, other: 'Kconfig') -> bool: + other_dict = {e.name: e.value for e in other.entries()} for a in self.entries(): - found = False - for b in other.entries(): - if a.name != b.name: + b = other_dict.get(a.name) + if b is None: + if a.value == 'n': continue - if a.value != b.value: - return False - found = True - if a.value != 'n' and found == False: + return False + elif a.value != b: return False return True From c9ef2d3e3f3b3e56429f56bbea2d16882b054dbe Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 19 Jan 2021 15:52:26 -0800 Subject: [PATCH 334/414] KUnit: Docs: make start.rst example Kconfig follow style.rst The primary change is that we want to encourage people to respect KUNIT_ALL_TESTS to make it easy to run all the relevant tests for a given config. Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/start.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst index 454f307813ea..560f27af4619 100644 --- a/Documentation/dev-tools/kunit/start.rst +++ b/Documentation/dev-tools/kunit/start.rst @@ -196,8 +196,9 @@ Now add the following to ``drivers/misc/Kconfig``: .. code-block:: kconfig config MISC_EXAMPLE_TEST - bool "Test for my example" + tristate "Test for my example" if !KUNIT_ALL_TESTS depends on MISC_EXAMPLE && KUNIT=y + default KUNIT_ALL_TESTS and the following to ``drivers/misc/Makefile``: From 7c2b108cbe75f993d5e69d5205a01211fa33417d Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Mon, 25 Jan 2021 10:53:33 -0800 Subject: [PATCH 335/414] Documentation: kunit: add tips.rst for small examples ./usage.rst contains fairly long examples and explanations of things like how to fake a class and how to use parameterized tests (and how you could do table-driven tests yourself). It's not exactly necessary information, so we add a new page with more digestible tips like "use kunit_kzalloc() instead of kzalloc() so you don't have to worry about calling kfree() yourself" and the like. Change start.rst to point users to this new page first and let them know that usage.rst is more of optional further reading. Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/index.rst | 2 + Documentation/dev-tools/kunit/start.rst | 4 +- Documentation/dev-tools/kunit/tips.rst | 115 ++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 Documentation/dev-tools/kunit/tips.rst diff --git a/Documentation/dev-tools/kunit/index.rst b/Documentation/dev-tools/kunit/index.rst index c234a3ab3c34..848478838347 100644 --- a/Documentation/dev-tools/kunit/index.rst +++ b/Documentation/dev-tools/kunit/index.rst @@ -13,6 +13,7 @@ KUnit - Unit Testing for the Linux Kernel api/index style faq + tips What is KUnit? ============== @@ -88,6 +89,7 @@ How do I use it? ================ * :doc:`start` - for new users of KUnit +* :doc:`tips` - for short examples of best practices * :doc:`usage` - for a more detailed explanation of KUnit features * :doc:`api/index` - for the list of KUnit APIs used for testing * :doc:`kunit-tool` - for more information on the kunit_tool helper script diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst index 560f27af4619..0e65cabe08eb 100644 --- a/Documentation/dev-tools/kunit/start.rst +++ b/Documentation/dev-tools/kunit/start.rst @@ -234,5 +234,7 @@ Congrats! You just wrote your first KUnit test! Next Steps ========== -* Check out the :doc:`usage` page for a more +* Check out the :doc:`tips` page for tips on + writing idiomatic KUnit tests. +* Optional: see the :doc:`usage` page for a more in-depth explanation of KUnit. diff --git a/Documentation/dev-tools/kunit/tips.rst b/Documentation/dev-tools/kunit/tips.rst new file mode 100644 index 000000000000..a6ca0af14098 --- /dev/null +++ b/Documentation/dev-tools/kunit/tips.rst @@ -0,0 +1,115 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================ +Tips For Writing KUnit Tests +============================ + +Exiting early on failed expectations +------------------------------------ + +``KUNIT_EXPECT_EQ`` and friends will mark the test as failed and continue +execution. In some cases, it's unsafe to continue and you can use the +``KUNIT_ASSERT`` variant to exit on failure. + +.. code-block:: c + + void example_test_user_alloc_function(struct kunit *test) + { + void *object = alloc_some_object_for_me(); + + /* Make sure we got a valid pointer back. */ + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, object); + do_something_with_object(object); + } + +Allocating memory +----------------- + +Where you would use ``kzalloc``, you should prefer ``kunit_kzalloc`` instead. +KUnit will ensure the memory is freed once the test completes. + +This is particularly useful since it lets you use the ``KUNIT_ASSERT_EQ`` +macros to exit early from a test without having to worry about remembering to +call ``kfree``. + +Example: + +.. code-block:: c + + void example_test_allocation(struct kunit *test) + { + char *buffer = kunit_kzalloc(test, 16, GFP_KERNEL); + /* Ensure allocation succeeded. */ + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buffer); + + KUNIT_ASSERT_STREQ(test, buffer, ""); + } + + +Testing static functions +------------------------ + +If you don't want to expose functions or variables just for testing, one option +is to conditionally ``#include`` the test file at the end of your .c file, e.g. + +.. code-block:: c + + /* In my_file.c */ + + static int do_interesting_thing(); + + #ifdef CONFIG_MY_KUNIT_TEST + #include "my_kunit_test.c" + #endif + +Injecting test-only code +------------------------ + +Similarly to the above, it can be useful to add test-specific logic. + +.. code-block:: c + + /* In my_file.h */ + + #ifdef CONFIG_MY_KUNIT_TEST + /* Defined in my_kunit_test.c */ + void test_only_hook(void); + #else + void test_only_hook(void) { } + #endif + +TODO(dlatypov@google.com): add an example of using ``current->kunit_test`` in +such a hook when it's not only updated for ``CONFIG_KASAN=y``. + +Customizing error messages +-------------------------- + +Each of the ``KUNIT_EXPECT`` and ``KUNIT_ASSERT`` macros have a ``_MSG`` variant. +These take a format string and arguments to provide additional context to the automatically generated error messages. + +.. code-block:: c + + char some_str[41]; + generate_sha1_hex_string(some_str); + + /* Before. Not easy to tell why the test failed. */ + KUNIT_EXPECT_EQ(test, strlen(some_str), 40); + + /* After. Now we see the offending string. */ + KUNIT_EXPECT_EQ_MSG(test, strlen(some_str), 40, "some_str='%s'", some_str); + +Alternatively, one can take full control over the error message by using ``KUNIT_FAIL()``, e.g. + +.. code-block:: c + + /* Before */ + KUNIT_EXPECT_EQ(test, some_setup_function(), 0); + + /* After: full control over the failure message. */ + if (some_setup_function()) + KUNIT_FAIL(test, "Failed to setup thing for testing"); + +Next Steps +========== +* Optional: see the :doc:`usage` page for a more + in-depth explanation of KUnit. From 243180f5924ed27ea417db39feb7f9691777688e Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Mon, 1 Feb 2021 12:55:14 -0800 Subject: [PATCH 336/414] kunit: make kunit_tool accept optional path to .kunitconfig fragment Currently running tests via KUnit tool means tweaking a .kunitconfig file, which you'd keep around locally and never commit. This changes makes it so users can pass in a path to a kunitconfig. One of the imagined use cases is having kunitconfig fragments in-tree to formalize interesting sets of tests for features/subsystems, e.g. $ ./tools/testing/kunit/kunit.py run --kunticonfig=fs/ext4/kunitconfig For now, this hypothetical fs/ext4/kunitconfig would contain CONFIG_KUNIT=y CONFIG_EXT4_FS=y CONFIG_EXT4_KUNIT_TESTS=y At the moment, it's not hard to manually whip up this file, but as more and more tests get added, this will get tedious. It also opens the door to documenting how to run all the tests relevant to a specific subsystem or feature as a simple one-liner. This can be seen as an analogue to tools/testing/selftests/*/config But in the case of KUnit, the tests live in the same directory as the code-under-test, so it feels more natural to allow the kunitconfig fragments to live anywhere. (Though, people could create a separate directory if wanted; this patch imposes no restrictions on the path). Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 9 +++++--- tools/testing/kunit/kunit_kernel.py | 12 ++++++---- tools/testing/kunit/kunit_tool_test.py | 32 ++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index e808a47c839b..02871a363f76 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -182,6 +182,9 @@ def add_common_opts(parser) -> None: parser.add_argument('--alltests', help='Run all KUnit tests through allyesconfig', action='store_true') + parser.add_argument('--kunitconfig', + help='Path to Kconfig fragment that enables KUnit tests', + metavar='kunitconfig') def add_build_opts(parser) -> None: parser.add_argument('--jobs', @@ -256,7 +259,7 @@ def main(argv, linux=None): os.mkdir(cli_args.build_dir) if not linux: - linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig) request = KunitRequest(cli_args.raw_output, cli_args.timeout, @@ -274,7 +277,7 @@ def main(argv, linux=None): os.mkdir(cli_args.build_dir) if not linux: - linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig) request = KunitConfigRequest(cli_args.build_dir, cli_args.make_options) @@ -286,7 +289,7 @@ def main(argv, linux=None): sys.exit(1) elif cli_args.subcommand == 'build': if not linux: - linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig) request = KunitBuildRequest(cli_args.jobs, cli_args.build_dir, diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 2076a5a2d060..0b461663e7d9 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -123,7 +123,7 @@ def get_outfile_path(build_dir) -> str: class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" - def __init__(self, build_dir: str, load_config=True, defconfig=DEFAULT_KUNITCONFIG_PATH) -> None: + def __init__(self, build_dir: str, load_config=True, kunitconfig_path='') -> None: signal.signal(signal.SIGINT, self.signal_handler) self._ops = LinuxSourceTreeOperations() @@ -131,9 +131,13 @@ class LinuxSourceTree(object): if not load_config: return - kunitconfig_path = get_kunitconfig_path(build_dir) - if not os.path.exists(kunitconfig_path): - shutil.copyfile(defconfig, kunitconfig_path) + if kunitconfig_path: + if not os.path.exists(kunitconfig_path): + raise ConfigError(f'Specified kunitconfig ({kunitconfig_path}) does not exist') + else: + kunitconfig_path = get_kunitconfig_path(build_dir) + if not os.path.exists(kunitconfig_path): + shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, kunitconfig_path) self._kconfig = kunit_config.Kconfig() self._kconfig.read_from_file(kunitconfig_path) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 888f42129f20..251bb0b4bc2e 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -12,6 +12,7 @@ from unittest import mock import tempfile, shutil # Handling test_tmpdir import json +import signal import os import kunit_config @@ -236,6 +237,23 @@ class KUnitParserTest(unittest.TestCase): result.status) self.assertEqual('kunit-resource-test', result.suites[0].name) +class LinuxSourceTreeTest(unittest.TestCase): + + def setUp(self): + mock.patch.object(signal, 'signal').start() + self.addCleanup(mock.patch.stopall) + + def test_invalid_kunitconfig(self): + with self.assertRaisesRegex(kunit_kernel.ConfigError, 'nonexistent.* does not exist'): + kunit_kernel.LinuxSourceTree('', kunitconfig_path='/nonexistent_file') + + def test_valid_kunitconfig(self): + with tempfile.NamedTemporaryFile('wt') as kunitconfig: + tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name) + + # TODO: add more test cases. + + class KUnitJsonTest(unittest.TestCase): def _json_for(self, log_file): @@ -378,5 +396,19 @@ class KUnitMainTest(unittest.TestCase): self.linux_source_mock.run_kernel.assert_called_once_with(build_dir=build_dir, timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) + @mock.patch.object(kunit_kernel, 'LinuxSourceTree') + def test_run_kunitconfig(self, mock_linux_init): + mock_linux_init.return_value = self.linux_source_mock + kunit.main(['run', '--kunitconfig=mykunitconfig']) + # Just verify that we parsed and initialized it correctly here. + mock_linux_init.assert_called_once_with('.kunit', kunitconfig_path='mykunitconfig') + + @mock.patch.object(kunit_kernel, 'LinuxSourceTree') + def test_config_kunitconfig(self, mock_linux_init): + mock_linux_init.return_value = self.linux_source_mock + kunit.main(['config', '--kunitconfig=mykunitconfig']) + # Just verify that we parsed and initialized it correctly here. + mock_linux_init.assert_called_once_with('.kunit', kunitconfig_path='mykunitconfig') + if __name__ == '__main__': unittest.main() From 65af9b964d72d8d8e88f4f673d4d0e9467197373 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Fri, 5 Feb 2021 14:14:09 -0800 Subject: [PATCH 337/414] kunit: don't show `1 == 1` in failed assertion messages Currently, given something (fairly dystopian) like > KUNIT_EXPECT_EQ(test, 2 + 2, 5) KUnit will prints a failure message like this. > Expected 2 + 2 == 5, but > 2 + 2 == 4 > 5 == 5 With this patch, the output just becomes > Expected 2 + 2 == 5, but > 2 + 2 == 4 This patch is slightly hacky, but it's quite common* to compare an expression to a literal integer value, so this can make KUnit less chatty in many cases. (This patch also fixes variants like KUNIT_EXPECT_GT, LE, et al.). It also allocates an additional string briefly, but given this only happens on test failures, it doesn't seem too bad a tradeoff. Also, in most cases it'll realize the lengths are unequal and bail out before the allocation. We could save the result of the formatted string to avoid wasting this extra work, but it felt cleaner to leave it as-is. Edge case: for something silly and unrealistic like > KUNIT_EXPECT_EQ(test, 4, 5); It'll generate this message with a trailing "but" > Expected 4 == 5, but > It didn't feel worth adding a check up-front to see if both sides are literals to handle this better. *A quick grep suggests 100+ comparisons to an integer literal as the right hand side. Signed-off-by: Daniel Latypov Tested-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- lib/kunit/assert.c | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/lib/kunit/assert.c b/lib/kunit/assert.c index 33acdaa28a7d..e0ec7d6fed6f 100644 --- a/lib/kunit/assert.c +++ b/lib/kunit/assert.c @@ -85,6 +85,29 @@ void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert, } EXPORT_SYMBOL_GPL(kunit_ptr_not_err_assert_format); +/* Checks if `text` is a literal representing `value`, e.g. "5" and 5 */ +static bool is_literal(struct kunit *test, const char *text, long long value, + gfp_t gfp) +{ + char *buffer; + int len; + bool ret; + + len = snprintf(NULL, 0, "%lld", value); + if (strlen(text) != len) + return false; + + buffer = kunit_kmalloc(test, len+1, gfp); + if (!buffer) + return false; + + snprintf(buffer, len+1, "%lld", value); + ret = strncmp(buffer, text, len) == 0; + + kunit_kfree(test, buffer); + return ret; +} + void kunit_binary_assert_format(const struct kunit_assert *assert, struct string_stream *stream) { @@ -97,12 +120,16 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, binary_assert->left_text, binary_assert->operation, binary_assert->right_text); - string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld\n", - binary_assert->left_text, - binary_assert->left_value); - string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld", - binary_assert->right_text, - binary_assert->right_value); + if (!is_literal(stream->test, binary_assert->left_text, + binary_assert->left_value, stream->gfp)) + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld\n", + binary_assert->left_text, + binary_assert->left_value); + if (!is_literal(stream->test, binary_assert->right_text, + binary_assert->right_value, stream->gfp)) + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld", + binary_assert->right_text, + binary_assert->right_value); kunit_assert_print_msg(assert, stream); } EXPORT_SYMBOL_GPL(kunit_binary_assert_format); From 5d31f71efcb6bce56ca3ab92eed0c8f2dbcc6f9a Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Fri, 5 Feb 2021 16:08:52 -0800 Subject: [PATCH 338/414] kunit: add kunit.filter_glob cmdline option to filter suites E.g. specifying this would run suites with "list" in their name. kunit.filter_glob=list* Note: the executor prints out a TAP header that includes the number of suites we intend to run. So unless we want to report empty results for filtered-out suites, we need to do the filtering here in the executor. It's also probably better in the executor since we most likely don't want any filtering to apply to tests built as modules. This code does add a CONFIG_GLOB=y dependency for CONFIG_KUNIT=y. But the code seems light enough that it shouldn't be an issue. For now, we only filter on suite names so we don't have to create copies of the suites themselves, just the array (of arrays) holding them. The name is rather generic since in the future, we could consider extending it to a syntax like: kunit.filter_glob=. E.g. to run all the del list tests kunit.filter_glob=list-kunit-test.*del* But at the moment, it's far easier to manually comment out test cases in test files as opposed to messing with sets of Kconfig entries to select specific suites. So even just doing this makes using kunit far less annoying. Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- lib/kunit/Kconfig | 1 + lib/kunit/executor.c | 93 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 85 insertions(+), 9 deletions(-) diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig index 00909e6a2443..0b5dfb001bac 100644 --- a/lib/kunit/Kconfig +++ b/lib/kunit/Kconfig @@ -4,6 +4,7 @@ menuconfig KUNIT tristate "KUnit - Enable support for unit tests" + select GLOB if KUNIT=y help Enables support for kernel unit tests (KUnit), a lightweight unit testing and mocking framework for the Linux kernel. These tests are diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index a95742a4ece7..15832ed44668 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include /* * These symbols point to the .kunit_test_suites section and are defined in @@ -11,14 +13,81 @@ extern struct kunit_suite * const * const __kunit_suites_end[]; #if IS_BUILTIN(CONFIG_KUNIT) -static void kunit_print_tap_header(void) +static char *filter_glob; +module_param(filter_glob, charp, 0); +MODULE_PARM_DESC(filter_glob, + "Filter which KUnit test suites run at boot-time, e.g. list*"); + +static struct kunit_suite * const * +kunit_filter_subsuite(struct kunit_suite * const * const subsuite) +{ + int i, n = 0; + struct kunit_suite **filtered; + + n = 0; + for (i = 0; subsuite[i] != NULL; ++i) { + if (glob_match(filter_glob, subsuite[i]->name)) + ++n; + } + + if (n == 0) + return NULL; + + filtered = kmalloc_array(n + 1, sizeof(*filtered), GFP_KERNEL); + if (!filtered) + return NULL; + + n = 0; + for (i = 0; subsuite[i] != NULL; ++i) { + if (glob_match(filter_glob, subsuite[i]->name)) + filtered[n++] = subsuite[i]; + } + filtered[n] = NULL; + + return filtered; +} + +struct suite_set { + struct kunit_suite * const * const *start; + struct kunit_suite * const * const *end; +}; + +static struct suite_set kunit_filter_suites(void) +{ + int i; + struct kunit_suite * const **copy, * const *filtered_subsuite; + struct suite_set filtered; + + const size_t max = __kunit_suites_end - __kunit_suites_start; + + if (!filter_glob) { + filtered.start = __kunit_suites_start; + filtered.end = __kunit_suites_end; + return filtered; + } + + copy = kmalloc_array(max, sizeof(*filtered.start), GFP_KERNEL); + filtered.start = copy; + if (!copy) { /* won't be able to run anything, return an empty set */ + filtered.end = copy; + return filtered; + } + + for (i = 0; i < max; ++i) { + filtered_subsuite = kunit_filter_subsuite(__kunit_suites_start[i]); + if (filtered_subsuite) + *copy++ = filtered_subsuite; + } + filtered.end = copy; + return filtered; +} + +static void kunit_print_tap_header(struct suite_set *suite_set) { struct kunit_suite * const * const *suites, * const *subsuite; int num_of_suites = 0; - for (suites = __kunit_suites_start; - suites < __kunit_suites_end; - suites++) + for (suites = suite_set->start; suites < suite_set->end; suites++) for (subsuite = *suites; *subsuite != NULL; subsuite++) num_of_suites++; @@ -30,12 +99,18 @@ int kunit_run_all_tests(void) { struct kunit_suite * const * const *suites; - kunit_print_tap_header(); + struct suite_set suite_set = kunit_filter_suites(); - for (suites = __kunit_suites_start; - suites < __kunit_suites_end; - suites++) - __kunit_test_suites_init(*suites); + kunit_print_tap_header(&suite_set); + + for (suites = suite_set.start; suites < suite_set.end; suites++) + __kunit_test_suites_init(*suites); + + if (filter_glob) { /* a copy was made of each array */ + for (suites = suite_set.start; suites < suite_set.end; suites++) + kfree(*suites); + kfree(suite_set.start); + } return 0; } From d992880b3d265597c5a16af3775257999492e957 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Fri, 5 Feb 2021 16:08:53 -0800 Subject: [PATCH 339/414] kunit: tool: add support for filtering suites by glob This allows running different subsets of tests, e.g. $ ./tools/testing/kunit/kunit.py build $ ./tools/testing/kunit/kunit.py exec 'list*' $ ./tools/testing/kunit/kunit.py exec 'kunit*' This passes the "kunit_filter.glob" commandline option to the UML kernel, which currently only supports filtering by suite name. Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 21 ++++++++++++++++----- tools/testing/kunit/kunit_kernel.py | 4 +++- tools/testing/kunit/kunit_tool_test.py | 15 +++++++++------ 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 02871a363f76..d5144fcb03ac 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -28,12 +28,12 @@ KunitBuildRequest = namedtuple('KunitBuildRequest', ['jobs', 'build_dir', 'alltests', 'make_options']) KunitExecRequest = namedtuple('KunitExecRequest', - ['timeout', 'build_dir', 'alltests']) + ['timeout', 'build_dir', 'alltests', 'filter_glob']) KunitParseRequest = namedtuple('KunitParseRequest', ['raw_output', 'input_data', 'build_dir', 'json']) KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', - 'build_dir', 'alltests', 'json', - 'make_options']) + 'build_dir', 'alltests', 'filter_glob', + 'json', 'make_options']) KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] @@ -93,6 +93,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, test_start = time.time() result = linux.run_kernel( timeout=None if request.alltests else request.timeout, + filter_glob=request.filter_glob, build_dir=request.build_dir) test_end = time.time() @@ -149,7 +150,7 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, return build_result exec_request = KunitExecRequest(request.timeout, request.build_dir, - request.alltests) + request.alltests, request.filter_glob) exec_result = exec_tests(linux, exec_request) if exec_result.status != KunitStatus.SUCCESS: return exec_result @@ -200,6 +201,14 @@ def add_exec_opts(parser) -> None: type=int, default=300, metavar='timeout') + parser.add_argument('filter_glob', + help='maximum number of seconds to allow for all tests ' + 'to run. This does not include time taken to build the ' + 'tests.', + type=str, + nargs='?', + default='', + metavar='filter_glob') def add_parse_opts(parser) -> None: parser.add_argument('--raw_output', help='don\'t format output from kernel', @@ -266,6 +275,7 @@ def main(argv, linux=None): cli_args.jobs, cli_args.build_dir, cli_args.alltests, + cli_args.filter_glob, cli_args.json, cli_args.make_options) result = run_tests(linux, request) @@ -307,7 +317,8 @@ def main(argv, linux=None): exec_request = KunitExecRequest(cli_args.timeout, cli_args.build_dir, - cli_args.alltests) + cli_args.alltests, + cli_args.filter_glob) exec_result = exec_tests(linux, exec_request) parse_request = KunitParseRequest(cli_args.raw_output, exec_result.result, diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 0b461663e7d9..71a5f5c1750b 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -203,8 +203,10 @@ class LinuxSourceTree(object): return False return self.validate_config(build_dir) - def run_kernel(self, args=[], build_dir='', timeout=None) -> Iterator[str]: + def run_kernel(self, args=[], build_dir='', filter_glob='', timeout=None) -> Iterator[str]: args.extend(['mem=1G', 'console=tty']) + if filter_glob: + args.append('kunit.filter_glob='+filter_glob) self._ops.linux_bin(args, timeout, build_dir) outfile = get_outfile_path(build_dir) subprocess.call(['stty', 'sane']) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 251bb0b4bc2e..1ad3049e9069 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -315,7 +315,8 @@ class KUnitMainTest(unittest.TestCase): kunit.main(['exec'], self.linux_source_mock) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) - self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=300) + self.linux_source_mock.run_kernel.assert_called_once_with( + build_dir='.kunit', filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_passes_args_pass(self): @@ -323,7 +324,7 @@ class KUnitMainTest(unittest.TestCase): self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - build_dir='.kunit', timeout=300) + build_dir='.kunit', filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_exec_passes_args_fail(self): @@ -361,7 +362,8 @@ class KUnitMainTest(unittest.TestCase): def test_exec_timeout(self): timeout = 3453 kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock) - self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=timeout) + self.linux_source_mock.run_kernel.assert_called_once_with( + build_dir='.kunit', filter_glob='', timeout=timeout) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_timeout(self): @@ -369,7 +371,7 @@ class KUnitMainTest(unittest.TestCase): kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - build_dir='.kunit', timeout=timeout) + build_dir='.kunit', filter_glob='', timeout=timeout) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_builddir(self): @@ -377,7 +379,7 @@ class KUnitMainTest(unittest.TestCase): kunit.main(['run', '--build_dir=.kunit'], self.linux_source_mock) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - build_dir=build_dir, timeout=300) + build_dir=build_dir, filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_config_builddir(self): @@ -393,7 +395,8 @@ class KUnitMainTest(unittest.TestCase): def test_exec_builddir(self): build_dir = '.kunit' kunit.main(['exec', '--build_dir', build_dir], self.linux_source_mock) - self.linux_source_mock.run_kernel.assert_called_once_with(build_dir=build_dir, timeout=300) + self.linux_source_mock.run_kernel.assert_called_once_with( + build_dir=build_dir, filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) @mock.patch.object(kunit_kernel, 'LinuxSourceTree') From 7af29141a31a2a2350589471c8979ff5f22fb9b7 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Fri, 5 Feb 2021 16:08:54 -0800 Subject: [PATCH 340/414] kunit: tool: fix unintentional statefulness in run_kernel() This is a bug that has been present since the first version of this code. Using [] as a default parameter is dangerous, since it's mutable. Example using the REPL: >>> def bad(param = []): ... param.append(len(param)) ... print(param) ... >>> bad() [0] >>> bad() [0, 1] This wasn't a concern in the past since it would just keep appending the same values to it. E.g. before, `args` would just grow in size like: [mem=1G', 'console=tty'] [mem=1G', 'console=tty', mem=1G', 'console=tty'] But with now filter_glob, this is more dangerous, e.g. run_kernel(filter_glob='my-test*') # default modified here run_kernel() # filter_glob still applies here! That earlier `filter_glob` will affect all subsequent calls that don't specify `args`. Note: currently the kunit tool only calls run_kernel() at most once, so it's not possible to trigger any negative side-effects right now. Fixes: 6ebf5866f2e8 ("kunit: tool: add Python wrappers for running KUnit tests") Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 71a5f5c1750b..f309a33256cd 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -203,7 +203,9 @@ class LinuxSourceTree(object): return False return self.validate_config(build_dir) - def run_kernel(self, args=[], build_dir='', filter_glob='', timeout=None) -> Iterator[str]: + def run_kernel(self, args=None, build_dir='', filter_glob='', timeout=None) -> Iterator[str]: + if not args: + args = [] args.extend(['mem=1G', 'console=tty']) if filter_glob: args.append('kunit.filter_glob='+filter_glob) From 64ba3d591c9d2be2a9c09e99b00732afe002ad0d Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 29 Jan 2021 03:05:10 +0000 Subject: [PATCH 341/414] kselftests: dmabuf-heaps: Fix Makefile's inclusion of the kernel's usr/include dir Copied in from somewhere else, the makefile was including the kerne's usr/include dir, which caused the asm/ioctl.h file to be used. Unfortunately, that file has different values for _IOC_SIZEBITS and _IOC_WRITE than include/uapi/asm-generic/ioctl.h which then causes the _IOCW macros to give the wrong ioctl numbers, specifically for DMA_BUF_IOCTL_SYNC. This patch simply removes the extra include from the Makefile Cc: Shuah Khan Cc: Brian Starkey Cc: Sumit Semwal Cc: Laura Abbott Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Cc: Sandeep Patil Cc: Daniel Mentz Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linux-kselftest@vger.kernel.org Fixes: a8779927fd86c ("kselftests: Add dma-heap test") Signed-off-by: John Stultz Signed-off-by: Shuah Khan --- tools/testing/selftests/dmabuf-heaps/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/dmabuf-heaps/Makefile b/tools/testing/selftests/dmabuf-heaps/Makefile index 607c2acd2082..604b43ece15f 100644 --- a/tools/testing/selftests/dmabuf-heaps/Makefile +++ b/tools/testing/selftests/dmabuf-heaps/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -static -O3 -Wl,-no-as-needed -Wall -I../../../../usr/include +CFLAGS += -static -O3 -Wl,-no-as-needed -Wall TEST_GEN_PROGS = dmabuf-heap From 50c65a8342941d30dd5874993052a91c9a52591b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 29 Jan 2021 03:05:11 +0000 Subject: [PATCH 342/414] kselftests: dmabuf-heaps: Add clearer checks on DMABUF_BEGIN/END_SYNC Add logic to check the dmabuf sync calls succeed. Cc: Shuah Khan Cc: Brian Starkey Cc: Sumit Semwal Cc: Laura Abbott Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Cc: Sandeep Patil Cc: Daniel Mentz Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: John Stultz Signed-off-by: Shuah Khan --- .../selftests/dmabuf-heaps/dmabuf-heap.c | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 909da9cdda97..46f6759a8acc 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -130,16 +130,13 @@ static int dmabuf_heap_alloc(int fd, size_t len, unsigned int flags, dmabuf_fd); } -static void dmabuf_sync(int fd, int start_stop) +static int dmabuf_sync(int fd, int start_stop) { struct dma_buf_sync sync = { .flags = start_stop | DMA_BUF_SYNC_RW, }; - int ret; - ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync); - if (ret) - printf("sync failed %d\n", errno); + return ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync); } #define ONE_MEG (1024 * 1024) @@ -197,9 +194,18 @@ static int test_alloc_and_import(char *heap_name) } printf("import passed\n"); - dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); + ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); + if (ret < 0) { + printf("Sync start failed!\n"); + goto out; + } + memset(p, 0xff, ONE_MEG); - dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END); + ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END); + if (ret < 0) { + printf("Sync end failed!\n"); + goto out; + } printf("syncs passed\n"); close_handle(importer_fd, handle); From 1b50e10ee6997c795382570eac94ccc874611d61 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 29 Jan 2021 03:05:12 +0000 Subject: [PATCH 343/414] kselftests: dmabuf-heaps: Softly fail if don't find a vgem device While testing against a vgem device is helpful for testing importing they aren't always configured in, so don't make it a fatal failure. Cc: Shuah Khan Cc: Brian Starkey Cc: Sumit Semwal Cc: Laura Abbott Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Cc: Sandeep Patil Cc: Daniel Mentz Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: John Stultz Signed-off-by: Shuah Khan --- .../testing/selftests/dmabuf-heaps/dmabuf-heap.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 46f6759a8acc..8cedd539c7fb 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -184,16 +184,15 @@ static int test_alloc_and_import(char *heap_name) if (importer_fd < 0) { ret = importer_fd; printf("Failed to open vgem\n"); - goto out; + } else { + ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle); + if (ret < 0) { + printf("Failed to import buffer\n"); + goto out; + } + printf("import passed\n"); } - ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle); - if (ret < 0) { - printf("Failed to import buffer\n"); - goto out; - } - printf("import passed\n"); - ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); if (ret < 0) { printf("Sync start failed!\n"); From 06fc1aaea968949d5413722742f74b2502b6e138 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 29 Jan 2021 03:05:13 +0000 Subject: [PATCH 344/414] kselftests: dmabuf-heaps: Cleanup test output Cleanup the test output so it is a bit easier to read Cc: Shuah Khan Cc: Brian Starkey Cc: Sumit Semwal Cc: Laura Abbott Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Cc: Sandeep Patil Cc: Daniel Mentz Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: John Stultz Signed-off-by: Shuah Khan --- .../selftests/dmabuf-heaps/dmabuf-heap.c | 44 +++++++++---------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 8cedd539c7fb..d179d81e2355 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -148,16 +148,14 @@ static int test_alloc_and_import(char *heap_name) void *p = NULL; int ret; - printf("Testing heap: %s\n", heap_name); - heap_fd = dmabuf_heap_open(heap_name); if (heap_fd < 0) return -1; - printf("Allocating 1 MEG\n"); + printf(" Testing allocation and importing: "); ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0, &dmabuf_fd); if (ret) { - printf("Allocation Failed!\n"); + printf("FAIL (Allocation Failed!)\n"); ret = -1; goto out; } @@ -169,11 +167,10 @@ static int test_alloc_and_import(char *heap_name) dmabuf_fd, 0); if (p == MAP_FAILED) { - printf("mmap() failed: %m\n"); + printf("FAIL (mmap() failed)\n"); ret = -1; goto out; } - printf("mmap passed\n"); dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); memset(p, 1, ONE_MEG / 2); @@ -183,33 +180,31 @@ static int test_alloc_and_import(char *heap_name) importer_fd = open_vgem(); if (importer_fd < 0) { ret = importer_fd; - printf("Failed to open vgem\n"); + printf("(Could not open vgem - skipping): "); } else { ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle); if (ret < 0) { - printf("Failed to import buffer\n"); + printf("FAIL (Failed to import buffer)\n"); goto out; } - printf("import passed\n"); } ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); if (ret < 0) { - printf("Sync start failed!\n"); + printf("FAIL (DMA_BUF_SYNC_START failed!)\n"); goto out; } memset(p, 0xff, ONE_MEG); ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END); if (ret < 0) { - printf("Sync end failed!\n"); + printf("FAIL (DMA_BUF_SYNC_END failed!)\n"); goto out; } - printf("syncs passed\n"); close_handle(importer_fd, handle); ret = 0; - + printf(" OK\n"); out: if (p) munmap(p, ONE_MEG); @@ -297,23 +292,24 @@ static int test_alloc_compat(char *heap_name) if (heap_fd < 0) return -1; - printf("Testing (theoretical)older alloc compat\n"); + printf(" Testing (theoretical)older alloc compat: "); ret = dmabuf_heap_alloc_older(heap_fd, ONE_MEG, 0, &dmabuf_fd); if (ret) { - printf("Older compat allocation failed!\n"); + printf("FAIL (Older compat allocation failed!)\n"); ret = -1; goto out; } close(dmabuf_fd); + printf("OK\n"); - printf("Testing (theoretical)newer alloc compat\n"); + printf(" Testing (theoretical)newer alloc compat: "); ret = dmabuf_heap_alloc_newer(heap_fd, ONE_MEG, 0, &dmabuf_fd); if (ret) { - printf("Newer compat allocation failed!\n"); + printf("FAIL (Newer compat allocation failed!)\n"); ret = -1; goto out; } - printf("Ioctl compatibility tests passed\n"); + printf("OK\n"); out: if (dmabuf_fd >= 0) close(dmabuf_fd); @@ -332,17 +328,17 @@ static int test_alloc_errors(char *heap_name) if (heap_fd < 0) return -1; - printf("Testing expected error cases\n"); + printf(" Testing expected error cases: "); ret = dmabuf_heap_alloc(0, ONE_MEG, 0x111111, &dmabuf_fd); if (!ret) { - printf("Did not see expected error (invalid fd)!\n"); + printf("FAIL (Did not see expected error (invalid fd)!)\n"); ret = -1; goto out; } ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0x111111, &dmabuf_fd); if (!ret) { - printf("Did not see expected error (invalid heap flags)!\n"); + printf("FAIL (Did not see expected error (invalid heap flags)!)\n"); ret = -1; goto out; } @@ -350,12 +346,12 @@ static int test_alloc_errors(char *heap_name) ret = dmabuf_heap_alloc_fdflags(heap_fd, ONE_MEG, ~(O_RDWR | O_CLOEXEC), 0, &dmabuf_fd); if (!ret) { - printf("Did not see expected error (invalid fd flags)!\n"); + printf("FAIL (Did not see expected error (invalid fd flags)!)\n"); ret = -1; goto out; } - printf("Expected error checking passed\n"); + printf("OK\n"); ret = 0; out: if (dmabuf_fd >= 0) @@ -384,6 +380,8 @@ int main(void) if (!strncmp(dir->d_name, "..", 3)) continue; + printf("Testing heap: %s\n", dir->d_name); + printf("=======================================\n"); ret = test_alloc_and_import(dir->d_name); if (ret) break; From 1d317c1ca2930759669bf416d04f2fbd3ce99fa9 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 29 Jan 2021 03:05:14 +0000 Subject: [PATCH 345/414] kselftests: dmabuf-heaps: Add extra checking that allocated buffers are zeroed Add a check to validate that buffers allocated from the heaps are properly zeroed before being given to userland. It is done by allocating a number of buffers, and filling them with a nonzero pattern, then closing and reallocating more buffers and checking that they are all properly zeroed. This is helpful to validate any cached buffers are zeroed before being given back out. Cc: Shuah Khan Cc: Brian Starkey Cc: Sumit Semwal Cc: Laura Abbott Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Cc: Sandeep Patil Cc: Daniel Mentz Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: John Stultz Signed-off-by: Shuah Khan --- .../selftests/dmabuf-heaps/dmabuf-heap.c | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index d179d81e2355..29af27acd40e 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -218,6 +218,84 @@ static int test_alloc_and_import(char *heap_name) return ret; } +static int test_alloc_zeroed(char *heap_name, size_t size) +{ + int heap_fd = -1, dmabuf_fd[32]; + int i, j, ret; + void *p = NULL; + char *c; + + printf(" Testing alloced %ldk buffers are zeroed: ", size / 1024); + heap_fd = dmabuf_heap_open(heap_name); + if (heap_fd < 0) + return -1; + + /* Allocate and fill a bunch of buffers */ + for (i = 0; i < 32; i++) { + ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]); + if (ret < 0) { + printf("FAIL (Allocation (%i) failed)\n", i); + goto out; + } + /* mmap and fill with simple pattern */ + p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0); + if (p == MAP_FAILED) { + printf("FAIL (mmap() failed!)\n"); + ret = -1; + goto out; + } + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START); + memset(p, 0xff, size); + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END); + munmap(p, size); + } + /* close them all */ + for (i = 0; i < 32; i++) + close(dmabuf_fd[i]); + + /* Allocate and validate all buffers are zeroed */ + for (i = 0; i < 32; i++) { + ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]); + if (ret < 0) { + printf("FAIL (Allocation (%i) failed)\n", i); + goto out; + } + + /* mmap and validate everything is zero */ + p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0); + if (p == MAP_FAILED) { + printf("FAIL (mmap() failed!)\n"); + ret = -1; + goto out; + } + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START); + c = (char *)p; + for (j = 0; j < size; j++) { + if (c[j] != 0) { + printf("FAIL (Allocated buffer not zeroed @ %i)\n", j); + break; + } + } + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END); + munmap(p, size); + } + /* close them all */ + for (i = 0; i < 32; i++) + close(dmabuf_fd[i]); + + close(heap_fd); + printf("OK\n"); + return 0; + +out: + while (i > 0) { + close(dmabuf_fd[i]); + i--; + } + close(heap_fd); + return ret; +} + /* Test the ioctl version compatibility w/ a smaller structure then expected */ static int dmabuf_heap_alloc_older(int fd, size_t len, unsigned int flags, int *dmabuf_fd) @@ -386,6 +464,14 @@ int main(void) if (ret) break; + ret = test_alloc_zeroed(dir->d_name, 4 * 1024); + if (ret) + break; + + ret = test_alloc_zeroed(dir->d_name, ONE_MEG); + if (ret) + break; + ret = test_alloc_compat(dir->d_name); if (ret) break; From 8a94b4ea28a3a6a506228f426746b1f4df62e625 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 8 Feb 2021 18:24:00 +0800 Subject: [PATCH 346/414] selftests/ipc: remove unneeded semicolon Eliminate the following coccicheck warning: ./tools/testing/selftests/ipc/msgque.c:72:3-4: Unneeded semicolon ./tools/testing/selftests/ipc/msgque.c:183:2-3: Unneeded semicolon ./tools/testing/selftests/ipc/msgque.c:191:2-3: Unneeded semicolon Signed-off-by: Yang Li Signed-off-by: Shuah Khan --- tools/testing/selftests/ipc/msgque.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index 5ec4d9e18806..656c43c24044 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -69,7 +69,7 @@ int restore_queue(struct msgque_data *msgque) printf("msgsnd failed (%m)\n"); ret = -errno; goto destroy; - }; + } } return 0; @@ -180,7 +180,7 @@ int fill_msgque(struct msgque_data *msgque) IPC_NOWAIT) != 0) { printf("First message send failed (%m)\n"); return -errno; - }; + } msgbuf.mtype = ANOTHER_MSG_TYPE; memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING)); @@ -188,7 +188,7 @@ int fill_msgque(struct msgque_data *msgque) IPC_NOWAIT) != 0) { printf("Second message send failed (%m)\n"); return -errno; - }; + } return 0; } From 18f6e68548587aa729dcff669321c818665d3def Mon Sep 17 00:00:00 2001 From: Yang Li Date: Sun, 7 Feb 2021 15:42:10 +0800 Subject: [PATCH 347/414] selftests/x86/ldt_gdt: remove unneeded semicolon Eliminate the following coccicheck warning: ./tools/testing/selftests/x86/ldt_gdt.c:610:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Shuah Khan --- tools/testing/selftests/x86/ldt_gdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 1aef72df20a1..3a29346e1452 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -607,7 +607,7 @@ static void do_multicpu_tests(void) failures++; asm volatile ("mov %0, %%ss" : : "rm" (orig_ss)); - }; + } ftx = 100; /* Kill the thread. */ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); From 9dd052474a2645b2a6171d19ad17b05b180d446d Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Sat, 30 Jan 2021 16:57:59 +0800 Subject: [PATCH 348/414] RDMA/hns: Allocate one more recv SGE for HIP08 The RQ/SRQ of HIP08 needs one special sge to stop receive reliably. So the driver needs to allocate at least one SGE when creating RQ/SRQ and ensure that at least one SGE is filled with the special value during post_recv. Besides, the kernel driver should only do this for kernel ULP. For userspace ULP, the userspace driver will allocate the reserved SGE in buffer, and the kernel driver just needs to pin the corresponding size of memory based on the userspace driver's requirements. Link: https://lore.kernel.org/r/1611997090-48820-2-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 +++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 28 ++++++++++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 39 ++++++++++++++++---- drivers/infiniband/hw/hns/hns_roce_srq.c | 40 +++++++++++++++++++-- 5 files changed, 94 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f62851faf9cc..72961e4edbf2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -65,6 +65,8 @@ #define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 #define HNS_ROCE_MIN_CQE_CNT 16 +#define HNS_ROCE_RESERVED_SGE 1 + #define HNS_ROCE_MAX_IRQ_NUM 128 #define HNS_ROCE_SGE_IN_WQE 2 @@ -395,6 +397,7 @@ struct hns_roce_wq { spinlock_t lock; u32 wqe_cnt; /* WQE num */ u32 max_gs; + u32 rsv_sge; int offset; int wqe_shift; /* WQE size */ u32 head; @@ -498,6 +501,7 @@ struct hns_roce_srq { unsigned long srqn; u32 wqe_cnt; int max_gs; + u32 rsv_sge; int wqe_shift; void __iomem *db_reg_l; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a5bbfb1c009b..2245d25cf64f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -741,6 +741,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, unsigned long flags; void *wqe = NULL; u32 wqe_idx; + u32 max_sge; int nreq; int ret; int i; @@ -754,6 +755,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } + max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq, hr_qp->ibqp.recv_cq))) { @@ -764,9 +766,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { + if (unlikely(wr->num_sge > max_sge)) { ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", - wr->num_sge, hr_qp->rq.max_gs); + wr->num_sge, max_sge); ret = -EINVAL; *bad_wr = wr; goto out; @@ -781,9 +783,10 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, dseg++; } - if (wr->num_sge < hr_qp->rq.max_gs) { + if (hr_qp->rq.rsv_sge) { dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg->addr = 0; + dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); } /* rq support inline data */ @@ -879,6 +882,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, __le32 *srq_idx; int ret = 0; int wqe_idx; + u32 max_sge; void *wqe; int nreq; int i; @@ -886,9 +890,13 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, spin_lock_irqsave(&srq->lock, flags); ind = srq->head & (srq->wqe_cnt - 1); + max_sge = srq->max_gs - srq->rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (unlikely(wr->num_sge >= srq->max_gs)) { + if (unlikely(wr->num_sge > max_sge)) { + ibdev_err(&hr_dev->ib_dev, + "srq: num_sge = %d, max_sge = %u.\n", + wr->num_sge, max_sge); ret = -EINVAL; *bad_wr = wr; break; @@ -916,9 +924,9 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); } - if (wr->num_sge < srq->max_gs) { - dseg[i].len = 0; - dseg[i].lkey = cpu_to_le32(0x100); + if (srq->rsv_sge) { + dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg[i].addr = 0; } @@ -1999,10 +2007,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg); caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline); caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg); + caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg); caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer); caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); + caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges); caps->num_aeq_vectors = resp_a->num_aeq_vectors; caps->num_other_vectors = resp_a->num_other_vectors; caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; @@ -5071,7 +5081,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, done: qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; - qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; if (!ibqp->uobject) { qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; @@ -5383,7 +5393,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) attr->srq_limit = limit_wl; attr->max_wr = srq->wqe_cnt - 1; - attr->max_sge = srq->max_gs; + attr->max_sge = srq->max_gs - srq->rsv_sge; out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 69bc072a941d..cd9abdd12ffc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -96,7 +96,8 @@ #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 -#define HNS_ROCE_INVALID_LKEY 0x100 +#define HNS_ROCE_INVALID_LKEY 0x0 +#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000 #define HNS_ROCE_CMQ_TX_TIMEOUT 30000 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9988ca9bd405..8af411f6389e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -413,9 +413,32 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) spin_unlock(&hr_dev->qp_table.bank_lock); } -static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, - struct hns_roce_qp *hr_qp, int has_rq) +static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp, + bool user) { + u32 max_sge = dev->caps.max_rq_sg; + + if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_qp->rq.rsv_sge = 1; + + return max_sge; +} + +static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, + struct hns_roce_qp *hr_qp, int has_rq, bool user) +{ + u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user); u32 cnt; /* If srq exist, set zero for relative number of rq */ @@ -431,8 +454,9 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, /* Check the validity of QP support capacity */ if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes || - cap->max_recv_sge > hr_dev->caps.max_rq_sg) { - ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n", + cap->max_recv_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "RQ config error, depth = %u, sge = %u\n", cap->max_recv_wr, cap->max_recv_sge); return -EINVAL; } @@ -444,7 +468,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return -EINVAL; } - hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); + hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) + + hr_qp->rq.rsv_sge); if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -459,7 +484,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, hr_qp->rq_inl_buf.wqe_cnt = 0; cap->max_recv_wr = cnt; - cap->max_recv_sge = hr_qp->rq.max_gs; + cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; return 0; } @@ -918,7 +943,7 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp, - hns_roce_qp_has_rq(init_attr)); + hns_roce_qp_has_rq(init_attr), !!udata); if (ret) { ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n", ret); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 94038280a3ec..1be68129b863 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -3,6 +3,7 @@ * Copyright (c) 2018 Hisilicon Limited. */ +#include #include #include "hns_roce_device.h" #include "hns_roce_cmd.h" @@ -277,6 +278,28 @@ static void free_srq_wrid(struct hns_roce_srq *srq) srq->wrid = NULL; } +static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq, + bool user) +{ + u32 max_sge = dev->caps.max_srq_sges; + + if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_srq->rsv_sge = 1; + + return max_sge; +} + int hns_roce_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata) @@ -286,6 +309,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, struct hns_roce_srq *srq = to_hr_srq(ib_srq); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_srq ucmd = {}; + u32 max_sge; int ret; u32 cqn; @@ -293,16 +317,24 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, init_attr->srq_type != IB_SRQT_XRC) return -EOPNOTSUPP; - /* Check the actual SRQ wqe and SRQ sge num */ + max_sge = proc_srq_sge(hr_dev, srq, !!udata); + if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || - init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) + init_attr->attr.max_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "SRQ config error, depth = %u, sge = %d\n", + init_attr->attr.max_wr, init_attr->attr.max_sge); return -EINVAL; + } mutex_init(&srq->mutex); spin_lock_init(&srq->lock); srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); - srq->max_gs = init_attr->attr.max_sge; + srq->max_gs = + roundup_pow_of_two(init_attr->attr.max_sge + srq->rsv_sge); + init_attr->attr.max_wr = srq->wqe_cnt; + init_attr->attr.max_sge = srq->max_gs; if (udata) { ret = ib_copy_from_udata(&ucmd, udata, @@ -349,6 +381,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->event = hns_roce_ib_srq_event; resp.srqn = srq->srqn; + srq->max_gs = init_attr->attr.max_sge; + init_attr->attr.max_sge = srq->max_gs - srq->rsv_sge; if (udata) { ret = ib_copy_to_udata(udata, &resp, From 1620f09b96ec14c1ff1ff64ee0aeabc027c653d5 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Sat, 30 Jan 2021 16:58:00 +0800 Subject: [PATCH 349/414] RDMA/hns: Bugfix for checking whether the srq is full when post wr If a user posts WR by wr_list, the head pointer of idx_queue won't be updated until all wqes are filled, so the judgment of whether head equals to tail will get a wrong result. Fix above issue and move the head and tail pointer from the srq structure into the idx_queue structure. After idx_queue is filled with wqe idx, the head pointer of it will increase. Fixes: c7bcb13442e1 ("RDMA/hns: Add SRQ support for hip08 kernel mode") Link: https://lore.kernel.org/r/1611997090-48820-3-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 19 ++++++++++++++----- drivers/infiniband/hw/hns/hns_roce_srq.c | 5 +++-- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 72961e4edbf2..916e031f1e75 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -494,6 +494,8 @@ struct hns_roce_idx_que { struct hns_roce_mtr mtr; int entry_shift; unsigned long *bitmap; + u32 head; + u32 tail; }; struct hns_roce_srq { @@ -513,8 +515,6 @@ struct hns_roce_srq { u64 *wrid; struct hns_roce_idx_que idx_que; spinlock_t lock; - u16 head; - u16 tail; struct mutex mutex; void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 2245d25cf64f..b13775a4c512 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -849,11 +849,20 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) spin_lock(&srq->lock); bitmap_clear(srq->idx_que.bitmap, wqe_index, 1); - srq->tail++; + srq->idx_que.tail++; spin_unlock(&srq->lock); } +int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + unsigned int cur; + + cur = idx_que->head - idx_que->tail; + return cur + nreq >= srq->wqe_cnt - 1; +} + static int find_empty_entry(struct hns_roce_idx_que *idx_que, unsigned long size) { @@ -889,7 +898,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, spin_lock_irqsave(&srq->lock, flags); - ind = srq->head & (srq->wqe_cnt - 1); + ind = srq->idx_que.head & (srq->wqe_cnt - 1); max_sge = srq->max_gs - srq->rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { @@ -902,7 +911,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, break; } - if (unlikely(srq->head == srq->tail)) { + if (unlikely(hns_roce_srqwq_overflow(srq, nreq))) { ret = -ENOMEM; *bad_wr = wr; break; @@ -938,7 +947,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, } if (likely(nreq)) { - srq->head += nreq; + srq->idx_que.head += nreq; /* * Make sure that descriptors are written before @@ -950,7 +959,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | (srq->srqn & V2_DB_BYTE_4_TAG_M)); srq_db.parameter = - cpu_to_le32(srq->head & V2_DB_PARAMETER_IDX_M); + cpu_to_le32(srq->idx_que.head & V2_DB_PARAMETER_IDX_M); hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 1be68129b863..e622fd1d4c4b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -245,6 +245,9 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, } } + idx_que->head = 0; + idx_que->tail = 0; + return 0; err_idx_mtr: hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); @@ -263,8 +266,6 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - srq->head = 0; - srq->tail = srq->wqe_cnt - 1; srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) return -ENOMEM; From b5df9b7a2f965b7903850d8f89846ffe0080b84b Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Sat, 30 Jan 2021 16:58:01 +0800 Subject: [PATCH 350/414] RDMA/hns: Force srq_limit to 0 when creating SRQ According to the IB Specification, srq_limit shouldn't be configured during SRQ creation. If a user set srq_limit at this time, the driver should forced it to zero, or the result of creating SRQ will conflict with the result of querying SRQ. Fixes: c7bcb13442e1 ("RDMA/hns: Add SRQ support for hip08 kernel mode") Link: https://lore.kernel.org/r/1611997090-48820-4-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_srq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index e622fd1d4c4b..47e66fe74d19 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -336,6 +336,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, roundup_pow_of_two(init_attr->attr.max_sge + srq->rsv_sge); init_attr->attr.max_wr = srq->wqe_cnt; init_attr->attr.max_sge = srq->max_gs; + init_attr->attr.srq_limit = 0; if (udata) { ret = ib_copy_from_udata(&ucmd, udata, From bb74fe7e81c8b2b65c6a351a247fdb9a969cbaec Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Sat, 30 Jan 2021 16:58:02 +0800 Subject: [PATCH 351/414] RDMA/hns: Fixed wrong judgments in the goto branch When an error occurs, the qp_table must be cleared, regardless of whether the SRQ feature is enabled. Fixes: 5c1f167af112 ("RDMA/hns: Init SRQ table for hip08") Link: https://lore.kernel.org/r/1611997090-48820-5-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 2b78b1ff63d3..79782209dbb5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -768,8 +768,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) return 0; err_qp_table_free: - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) - hns_roce_cleanup_qp_table(hr_dev); + hns_roce_cleanup_qp_table(hr_dev); err_cq_table_free: hns_roce_cleanup_cq_table(hr_dev); From 6ee00fbf733d7e17ca935e5636adfce605b10659 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Sat, 30 Jan 2021 16:58:03 +0800 Subject: [PATCH 352/414] RDMA/hns: Remove the reserved WQE of SRQ Each SRQs contain an reserved WQE, it is inappropriate and should be removed. Fixes: c7bcb13442e1 ("RDMA/hns: Add SRQ support for hip08 kernel mode") Link: https://lore.kernel.org/r/1611997090-48820-6-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_srq.c | 6 ++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 916e031f1e75..d6a846b47820 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -54,6 +54,7 @@ /* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 #define HNS_ROCE_MIN_WQE_NUM 0x20 +#define HNS_ROCE_MIN_SRQ_WQE_NUM 1 /* Hardware specification only for v1 engine */ #define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b13775a4c512..49a845603b02 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -860,7 +860,7 @@ int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq) unsigned int cur; cur = idx_que->head - idx_que->tail; - return cur + nreq >= srq->wqe_cnt - 1; + return cur + nreq >= srq->wqe_cnt; } static int find_empty_entry(struct hns_roce_idx_que *idx_que, @@ -5338,7 +5338,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, return -EINVAL; if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit >= srq->wqe_cnt) + if (srq_attr->srq_limit > srq->wqe_cnt) return -EINVAL; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -5401,7 +5401,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) SRQC_BYTE_8_SRQ_LIMIT_WL_S); attr->srq_limit = limit_wl; - attr->max_wr = srq->wqe_cnt - 1; + attr->max_wr = srq->wqe_cnt; attr->max_sge = srq->max_gs - srq->rsv_sge; out: diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 47e66fe74d19..5d20b30f8004 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -320,7 +320,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, max_sge = proc_srq_sge(hr_dev, srq, !!udata); - if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || + if (init_attr->attr.max_wr > hr_dev->caps.max_srq_wrs || init_attr->attr.max_sge > max_sge) { ibdev_err(&hr_dev->ib_dev, "SRQ config error, depth = %u, sge = %d\n", @@ -331,7 +331,9 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, mutex_init(&srq->mutex); spin_lock_init(&srq->lock); - srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); + init_attr->attr.max_wr = max_t(u32, init_attr->attr.max_wr, + HNS_ROCE_MIN_SRQ_WQE_NUM); + srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr); srq->max_gs = roundup_pow_of_two(init_attr->attr.max_sge + srq->rsv_sge); init_attr->attr.max_wr = srq->wqe_cnt; From 0fee451634969cd132a0101824ef2558b2b389be Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Sat, 30 Jan 2021 16:58:04 +0800 Subject: [PATCH 353/414] RDMA/hns: Refactor hns_roce_create_srq() Split the SRQ creation process into multiple steps and encapsulate them into functions. Link: https://lore.kernel.org/r/1611997090-48820-7-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 5 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 291 +++++++++++--------- 3 files changed, 172 insertions(+), 136 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index d6a846b47820..b325b9c4b485 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -506,6 +506,7 @@ struct hns_roce_srq { int max_gs; u32 rsv_sge; int wqe_shift; + u32 cqn; void __iomem *db_reg_l; atomic_t refcount; @@ -953,8 +954,8 @@ struct hns_roce_hw { int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); void (*write_srqc)(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn, - void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx, + struct hns_roce_srq *srq, void *mb_buf, + u64 *mtts_wqe, u64 *mtts_idx, dma_addr_t dma_handle_wqe, dma_addr_t dma_handle_idx); int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 49a845603b02..ec2d64c91114 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5228,9 +5228,9 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, } static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, u32 pdn, u16 xrcd, - u32 cqn, void *mb_buf, u64 *mtts_wqe, - u64 *mtts_idx, dma_addr_t dma_handle_wqe, + struct hns_roce_srq *srq, void *mb_buf, + u64 *mtts_wqe, u64 *mtts_idx, + dma_addr_t dma_handle_wqe, dma_addr_t dma_handle_idx) { struct hns_roce_srq_context *srq_context; @@ -5257,7 +5257,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, - SRQC_BYTE_12_SRQ_XRCD_S, xrcd); + SRQC_BYTE_12_SRQ_XRCD_S, 0); srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); @@ -5267,7 +5267,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, dma_handle_wqe >> 35); roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, - SRQC_BYTE_28_PD_S, pdn); + SRQC_BYTE_28_PD_S, to_hr_pd(srq->ibsrq.pd)->pdn); roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1)); @@ -5307,7 +5307,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); roce_set_field(srq_context->byte_56_xrc_cqn, SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, - cqn); + srq->cqn); roce_set_field(srq_context->byte_56_xrc_cqn, SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 5d20b30f8004..5069b8103b98 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -77,8 +77,7 @@ static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, - u32 pdn, u32 cqn, u16 xrcd, u64 db_rec_addr) +static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; struct ib_device *ibdev = &hr_dev->ib_dev; @@ -133,9 +132,8 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, goto err_xa; } - hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf, - mtts_wqe, mtts_idx, dma_handle_wqe, - dma_handle_idx); + hr_dev->hw->write_srqc(hr_dev, srq, mailbox->buf, mtts_wqe, mtts_idx, + dma_handle_wqe, dma_handle_idx); ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -144,9 +142,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, goto err_xa; } - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); - return ret; + return 0; err_xa: xa_erase(&srq_table->xa, srq->srqn); @@ -179,45 +175,13 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); } -static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, - struct ib_udata *udata, unsigned long addr) -{ - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_buf_attr buf_attr = {}; - int err; - - srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, - HNS_ROCE_SGE_SIZE * - srq->max_gs))); - - buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; - buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, - srq->wqe_shift); - buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; - buf_attr.region_count = 1; - - err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, - hr_dev->caps.srqwqe_ba_pg_sz + - HNS_HW_PAGE_SHIFT, udata, addr); - if (err) - ibdev_err(ibdev, - "failed to alloc SRQ buf mtr, ret = %d.\n", err); - - return err; -} - -static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) -{ - hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); -} - static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, struct ib_udata *udata, unsigned long addr) { struct hns_roce_idx_que *idx_que = &srq->idx_que; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int err; + int ret; srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ); @@ -227,20 +191,20 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; buf_attr.region_count = 1; - err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, + ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); - if (err) { + if (ret) { ibdev_err(ibdev, - "failed to alloc SRQ idx mtr, ret = %d.\n", err); - return err; + "failed to alloc SRQ idx mtr, ret = %d.\n", ret); + return ret; } if (!udata) { idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) { ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n"); - err = -ENOMEM; + ret = -ENOMEM; goto err_idx_mtr; } } @@ -252,7 +216,7 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, err_idx_mtr: hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); - return err; + return ret; } static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) @@ -264,6 +228,40 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); } +static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, + struct ib_udata *udata, unsigned long addr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int ret; + + srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, + HNS_ROCE_SGE_SIZE * + srq->max_gs))); + + buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; + buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, + srq->wqe_shift); + buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; + buf_attr.region_count = 1; + + ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, + hr_dev->caps.srqwqe_ba_pg_sz + + HNS_HW_PAGE_SHIFT, udata, addr); + if (ret) + ibdev_err(ibdev, + "failed to alloc SRQ buf mtr, ret = %d.\n", ret); + + return ret; +} + +static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq) +{ + hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); +} + static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); @@ -301,6 +299,104 @@ static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq, return max_sge; } +static int set_srq_basic_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); + struct ib_srq_attr *attr = &init_attr->attr; + u32 max_sge; + + max_sge = proc_srq_sge(hr_dev, srq, !!udata); + if (attr->max_wr > hr_dev->caps.max_srq_wrs || + attr->max_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "invalid SRQ attr, depth = %u, sge = %u.\n", + attr->max_wr, attr->max_sge); + return -EINVAL; + } + + attr->max_wr = max_t(u32, attr->max_wr, HNS_ROCE_MIN_SRQ_WQE_NUM); + srq->wqe_cnt = roundup_pow_of_two(attr->max_wr); + srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge); + + attr->max_wr = srq->wqe_cnt; + attr->max_sge = srq->max_gs - srq->rsv_sge; + attr->srq_limit = 0; + + return 0; +} + +static void set_srq_ext_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr) +{ + srq->cqn = ib_srq_has_cq(init_attr->srq_type) ? + to_hr_cq(init_attr->ext.cq)->cqn : 0; +} + +static int set_srq_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + int ret; + + ret = set_srq_basic_param(srq, init_attr, udata); + if (ret) + return ret; + + set_srq_ext_param(srq, init_attr); + + return 0; +} + +static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + struct ib_udata *udata) +{ + struct hns_roce_ib_create_srq ucmd = {}; + int ret; + + if (udata) { + ret = ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd))); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to copy SRQ udata, ret = %d.\n", + ret); + return ret; + } + } + + ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); + if (ret) + return ret; + + ret = alloc_srq_wqe_buf(hr_dev, srq, udata, ucmd.buf_addr); + if (ret) + goto err_idx; + + if (!udata) { + ret = alloc_srq_wrid(hr_dev, srq); + if (ret) + goto err_wqe_buf; + } + + return 0; + +err_wqe_buf: + free_srq_wqe_buf(hr_dev, srq); +err_idx: + free_srq_idx(hr_dev, srq); + + return ret; +} + +static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + free_srq_wrid(srq); + free_srq_wqe_buf(hr_dev, srq); + free_srq_idx(hr_dev, srq); +} + int hns_roce_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata) @@ -308,103 +404,44 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); struct hns_roce_ib_create_srq_resp resp = {}; struct hns_roce_srq *srq = to_hr_srq(ib_srq); - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_ib_create_srq ucmd = {}; - u32 max_sge; int ret; - u32 cqn; - - if (init_attr->srq_type != IB_SRQT_BASIC && - init_attr->srq_type != IB_SRQT_XRC) - return -EOPNOTSUPP; - - max_sge = proc_srq_sge(hr_dev, srq, !!udata); - - if (init_attr->attr.max_wr > hr_dev->caps.max_srq_wrs || - init_attr->attr.max_sge > max_sge) { - ibdev_err(&hr_dev->ib_dev, - "SRQ config error, depth = %u, sge = %d\n", - init_attr->attr.max_wr, init_attr->attr.max_sge); - return -EINVAL; - } mutex_init(&srq->mutex); spin_lock_init(&srq->lock); - init_attr->attr.max_wr = max_t(u32, init_attr->attr.max_wr, - HNS_ROCE_MIN_SRQ_WQE_NUM); - srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr); - srq->max_gs = - roundup_pow_of_two(init_attr->attr.max_sge + srq->rsv_sge); - init_attr->attr.max_wr = srq->wqe_cnt; - init_attr->attr.max_sge = srq->max_gs; - init_attr->attr.srq_limit = 0; - - if (udata) { - ret = ib_copy_from_udata(&ucmd, udata, - min(udata->inlen, sizeof(ucmd))); - if (ret) { - ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", - ret); - return ret; - } - } - - ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); - if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ buffer, ret = %d.\n", ret); + ret = set_srq_param(srq, init_attr, udata); + if (ret) return ret; - } - ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); - if (ret) { - ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret); - goto err_buf_alloc; - } + ret = alloc_srq_buf(hr_dev, srq, udata); + if (ret) + return ret; - if (!udata) { - ret = alloc_srq_wrid(hr_dev, srq); - if (ret) { - ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n", - ret); - goto err_idx_alloc; + ret = alloc_srqc(hr_dev, srq); + if (ret) + goto err_srq_buf; + + if (udata) { + resp.srqn = srq->srqn; + if (ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp)))) { + ret = -EFAULT; + goto err_srqc; } } - cqn = ib_srq_has_cq(init_attr->srq_type) ? - to_hr_cq(init_attr->ext.cq)->cqn : 0; srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; - - ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); - if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ context, ret = %d.\n", ret); - goto err_wrid_alloc; - } - srq->event = hns_roce_ib_srq_event; - resp.srqn = srq->srqn; - srq->max_gs = init_attr->attr.max_sge; - init_attr->attr.max_sge = srq->max_gs - srq->rsv_sge; - - if (udata) { - ret = ib_copy_to_udata(udata, &resp, - min(udata->outlen, sizeof(resp))); - if (ret) - goto err_srqc_alloc; - } + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); return 0; -err_srqc_alloc: +err_srqc: free_srqc(hr_dev, srq); -err_wrid_alloc: - free_srq_wrid(srq); -err_idx_alloc: - free_srq_idx(hr_dev, srq); -err_buf_alloc: +err_srq_buf: free_srq_buf(hr_dev, srq); + return ret; } @@ -414,8 +451,6 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) struct hns_roce_srq *srq = to_hr_srq(ibsrq); free_srqc(hr_dev, srq); - free_srq_idx(hr_dev, srq); - free_srq_wrid(srq); free_srq_buf(hr_dev, srq); return 0; } From eacb45ca8f4bb722ab5a9734379b37e4bd99f3c0 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Sat, 30 Jan 2021 16:58:05 +0800 Subject: [PATCH 354/414] RDMA/hns: Refactor code about SRQ Context Reduce parameter numbers of write_srqc() and move some related code into it from alloc_srqc(). Link: https://lore.kernel.org/r/1611997090-48820-8-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 6 +-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 33 +++++++++++++--- drivers/infiniband/hw/hns/hns_roce_srq.c | 44 ++++++--------------- 3 files changed, 42 insertions(+), 41 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index b325b9c4b485..d51641af9342 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -953,11 +953,7 @@ struct hns_roce_hw { int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); - void (*write_srqc)(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, void *mb_buf, - u64 *mtts_wqe, u64 *mtts_idx, - dma_addr_t dma_handle_wqe, - dma_addr_t dma_handle_idx); + int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf); int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ec2d64c91114..dd5f7b57f007 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5227,17 +5227,38 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, return ret; } -static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, void *mb_buf, - u64 *mtts_wqe, u64 *mtts_idx, - dma_addr_t dma_handle_wqe, - dma_addr_t dma_handle_idx) +static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) { + struct ib_device *ibdev = srq->ibsrq.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); struct hns_roce_srq_context *srq_context; + u64 mtts_wqe[MTT_MIN_COUNT] = {}; + u64 mtts_idx[MTT_MIN_COUNT] = {}; + dma_addr_t dma_handle_wqe = 0; + dma_addr_t dma_handle_idx = 0; + int ret; srq_context = mb_buf; memset(srq_context, 0, sizeof(*srq_context)); + /* Get the physical address of srq buf */ + ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, + ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); + if (ret < 1) { + ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", + ret); + return -ENOBUFS; + } + + /* Get physical address of idx que buf */ + ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, + ARRAY_SIZE(mtts_idx), &dma_handle_idx); + if (ret < 1) { + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", + ret); + return -ENOBUFS; + } + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, SRQC_BYTE_4_SRQ_ST_S, 1); @@ -5319,6 +5340,8 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, roce_set_bit(srq_context->db_record_addr_record_en, SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); + + return 0; } static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 5069b8103b98..d5a6de0e7095 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -82,34 +82,11 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; - u64 mtts_wqe[MTT_MIN_COUNT] = { 0 }; - u64 mtts_idx[MTT_MIN_COUNT] = { 0 }; - dma_addr_t dma_handle_wqe = 0; - dma_addr_t dma_handle_idx = 0; int ret; - /* Get the physical address of srq buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, - ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); - if (ret < 1) { - ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", - ret); - return -ENOBUFS; - } - - /* Get physical address of idx que buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, - ARRAY_SIZE(mtts_idx), &dma_handle_idx); - if (ret < 1) { - ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", - ret); - return -ENOBUFS; - } - ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ number, ret = %d.\n", ret); + ibdev_err(ibdev, "failed to alloc SRQ number.\n"); return -ENOMEM; } @@ -127,31 +104,36 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR_OR_NULL(mailbox)) { - ret = -ENOMEM; ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); + ret = -ENOMEM; goto err_xa; } - hr_dev->hw->write_srqc(hr_dev, srq, mailbox->buf, mtts_wqe, mtts_idx, - dma_handle_wqe, dma_handle_idx); + ret = hr_dev->hw->write_srqc(srq, mailbox->buf); + if (ret) { + ibdev_err(ibdev, "failed to write SRQC.\n"); + goto err_mbox; + } ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); - hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); - goto err_xa; + goto err_mbox; } + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return 0; +err_mbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); err_xa: xa_erase(&srq_table->xa, srq->srqn); - err_put: hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); - err_out: hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); + return ret; } From 3f31c4126573083f31e098c106127ef315d5f761 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Sat, 30 Jan 2021 16:58:06 +0800 Subject: [PATCH 355/414] RDMA/hns: Use new interfaces to write SRQC Use new register operation interfaces to simplify the process of write SRQ Context. Link: https://lore.kernel.org/r/1611997090-48820-9-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 156 +++++++++------------ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 69 ++++++--- 2 files changed, 116 insertions(+), 109 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index dd5f7b57f007..105019c5270e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5227,19 +5227,59 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, return ret; } +static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq, + struct hns_roce_srq_context *ctx) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + struct ib_device *ibdev = srq->ibsrq.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + u64 mtts_idx[MTT_MIN_COUNT] = {}; + dma_addr_t dma_handle_idx = 0; + int ret; + + /* Get physical address of idx que buf */ + ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx, + ARRAY_SIZE(mtts_idx), &dma_handle_idx); + if (ret < 1) { + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", + ret); + return -ENOBUFS; + } + + hr_reg_write(ctx, SRQC_IDX_HOP_NUM, + to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt)); + + hr_reg_write(ctx, SRQC_IDX_BT_BA_L, dma_handle_idx >> 3); + hr_reg_write(ctx, SRQC_IDX_BT_BA_H, upper_32_bits(dma_handle_idx >> 3)); + + hr_reg_write(ctx, SRQC_IDX_BA_PG_SZ, + to_hr_hw_page_shift(idx_que->mtr.hem_cfg.ba_pg_shift)); + hr_reg_write(ctx, SRQC_IDX_BUF_PG_SZ, + to_hr_hw_page_shift(idx_que->mtr.hem_cfg.buf_pg_shift)); + + hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_L, + to_hr_hw_page_addr(mtts_idx[0])); + hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); + + hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_L, + to_hr_hw_page_addr(mtts_idx[1])); + hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); + + return 0; +} + static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) { struct ib_device *ibdev = srq->ibsrq.device; struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); - struct hns_roce_srq_context *srq_context; + struct hns_roce_srq_context *ctx = mb_buf; u64 mtts_wqe[MTT_MIN_COUNT] = {}; - u64 mtts_idx[MTT_MIN_COUNT] = {}; dma_addr_t dma_handle_wqe = 0; - dma_addr_t dma_handle_idx = 0; int ret; - srq_context = mb_buf; - memset(srq_context, 0, sizeof(*srq_context)); + memset(ctx, 0, sizeof(*ctx)); /* Get the physical address of srq buf */ ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, @@ -5250,98 +5290,28 @@ static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) return -ENOBUFS; } - /* Get physical address of idx que buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, - ARRAY_SIZE(mtts_idx), &dma_handle_idx); - if (ret < 1) { - ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", - ret); - return -ENOBUFS; - } + hr_reg_write(ctx, SRQC_SRQ_ST, 1); + hr_reg_write(ctx, SRQC_PD, to_hr_pd(srq->ibsrq.pd)->pdn); + hr_reg_write(ctx, SRQC_SRQN, srq->srqn); + hr_reg_write(ctx, SRQC_XRCD, 0); + hr_reg_write(ctx, SRQC_XRC_CQN, srq->cqn); + hr_reg_write(ctx, SRQC_SHIFT, ilog2(srq->wqe_cnt)); + hr_reg_write(ctx, SRQC_RQWS, + srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1)); - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, - SRQC_BYTE_4_SRQ_ST_S, 1); + hr_reg_write(ctx, SRQC_WQE_HOP_NUM, + to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, + srq->wqe_cnt)); - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, - to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, - srq->wqe_cnt)); - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, - ilog2(srq->wqe_cnt)); + hr_reg_write(ctx, SRQC_WQE_BT_BA_L, dma_handle_wqe >> 3); + hr_reg_write(ctx, SRQC_WQE_BT_BA_H, upper_32_bits(dma_handle_wqe >> 3)); - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, - SRQC_BYTE_4_SRQN_S, srq->srqn); + hr_reg_write(ctx, SRQC_WQE_BA_PG_SZ, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); + hr_reg_write(ctx, SRQC_WQE_BUF_PG_SZ, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); - roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); - - roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, - SRQC_BYTE_12_SRQ_XRCD_S, 0); - - srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); - - roce_set_field(srq_context->byte_24_wqe_bt_ba, - SRQC_BYTE_24_SRQ_WQE_BT_BA_M, - SRQC_BYTE_24_SRQ_WQE_BT_BA_S, - dma_handle_wqe >> 35); - - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, - SRQC_BYTE_28_PD_S, to_hr_pd(srq->ibsrq.pd)->pdn); - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, - SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : - fls(srq->max_gs - 1)); - - srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3); - roce_set_field(srq_context->rsv_idx_bt_ba, - SRQC_BYTE_36_SRQ_IDX_BT_BA_M, - SRQC_BYTE_36_SRQ_IDX_BT_BA_S, - dma_handle_idx >> 35); - - srq_context->idx_cur_blk_addr = - cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0])); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, - upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, - to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, - srq->wqe_cnt)); - - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift)); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift)); - - srq_context->idx_nxt_blk_addr = - cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1])); - roce_set_field(srq_context->rsv_idxnxtblkaddr, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, - upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, - srq->cqn); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); - - roce_set_bit(srq_context->db_record_addr_record_en, - SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); - - return 0; + return hns_roce_v2_write_srqc_index_queue(srq, ctx); } static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index cd9abdd12ffc..e46c9354609e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -367,24 +367,61 @@ struct hns_roce_v2_cq_context { #define CQC_STASH CQC_FIELD_LOC(63, 63) struct hns_roce_srq_context { - __le32 byte_4_srqn_srqst; - __le32 byte_8_limit_wl; - __le32 byte_12_xrcd; - __le32 byte_16_pi_ci; - __le32 wqe_bt_ba; - __le32 byte_24_wqe_bt_ba; - __le32 byte_28_rqws_pd; - __le32 idx_bt_ba; - __le32 rsv_idx_bt_ba; - __le32 idx_cur_blk_addr; - __le32 byte_44_idxbufpgsz_addr; - __le32 idx_nxt_blk_addr; - __le32 rsv_idxnxtblkaddr; - __le32 byte_56_xrc_cqn; - __le32 db_record_addr_record_en; - __le32 db_record_addr; + __le32 byte_4_srqn_srqst; + __le32 byte_8_limit_wl; + __le32 byte_12_xrcd; + __le32 byte_16_pi_ci; + __le32 wqe_bt_ba; + __le32 byte_24_wqe_bt_ba; + __le32 byte_28_rqws_pd; + __le32 idx_bt_ba; + __le32 rsv_idx_bt_ba; + __le32 idx_cur_blk_addr; + __le32 byte_44_idxbufpgsz_addr; + __le32 idx_nxt_blk_addr; + __le32 rsv_idxnxtblkaddr; + __le32 byte_56_xrc_cqn; + __le32 db_record_addr_record_en; + __le32 db_record_addr; }; +#define SRQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_srq_context, h, l) + +#define SRQC_SRQ_ST SRQC_FIELD_LOC(1, 0) +#define SRQC_WQE_HOP_NUM SRQC_FIELD_LOC(3, 2) +#define SRQC_SHIFT SRQC_FIELD_LOC(7, 4) +#define SRQC_SRQN SRQC_FIELD_LOC(31, 8) +#define SRQC_LIMIT_WL SRQC_FIELD_LOC(47, 32) +#define SRQC_RSV0 SRQC_FIELD_LOC(63, 48) +#define SRQC_XRCD SRQC_FIELD_LOC(87, 64) +#define SRQC_RSV1 SRQC_FIELD_LOC(95, 88) +#define SRQC_PRODUCER_IDX SRQC_FIELD_LOC(111, 96) +#define SRQC_CONSUMER_IDX SRQC_FIELD_LOC(127, 112) +#define SRQC_WQE_BT_BA_L SRQC_FIELD_LOC(159, 128) +#define SRQC_WQE_BT_BA_H SRQC_FIELD_LOC(188, 160) +#define SRQC_RSV2 SRQC_FIELD_LOC(191, 189) +#define SRQC_PD SRQC_FIELD_LOC(215, 192) +#define SRQC_RQWS SRQC_FIELD_LOC(219, 216) +#define SRQC_RSV3 SRQC_FIELD_LOC(223, 220) +#define SRQC_IDX_BT_BA_L SRQC_FIELD_LOC(255, 224) +#define SRQC_IDX_BT_BA_H SRQC_FIELD_LOC(284, 256) +#define SRQC_RSV4 SRQC_FIELD_LOC(287, 285) +#define SRQC_IDX_CUR_BLK_ADDR_L SRQC_FIELD_LOC(319, 288) +#define SRQC_IDX_CUR_BLK_ADDR_H SRQC_FIELD_LOC(339, 320) +#define SRQC_RSV5 SRQC_FIELD_LOC(341, 340) +#define SRQC_IDX_HOP_NUM SRQC_FIELD_LOC(343, 342) +#define SRQC_IDX_BA_PG_SZ SRQC_FIELD_LOC(347, 344) +#define SRQC_IDX_BUF_PG_SZ SRQC_FIELD_LOC(351, 348) +#define SRQC_IDX_NXT_BLK_ADDR_L SRQC_FIELD_LOC(383, 352) +#define SRQC_IDX_NXT_BLK_ADDR_H SRQC_FIELD_LOC(403, 384) +#define SRQC_RSV6 SRQC_FIELD_LOC(415, 404) +#define SRQC_XRC_CQN SRQC_FIELD_LOC(439, 416) +#define SRQC_WQE_BA_PG_SZ SRQC_FIELD_LOC(443, 440) +#define SRQC_WQE_BUF_PG_SZ SRQC_FIELD_LOC(447, 444) +#define SRQC_DB_RECORD_EN SRQC_FIELD_LOC(448, 448) +#define SRQC_DB_RECORD_ADDR_L SRQC_FIELD_LOC(479, 449) +#define SRQC_DB_RECORD_ADDR_H SRQC_FIELD_LOC(511, 480) + #define SRQC_BYTE_4_SRQ_ST_S 0 #define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0) From 9ae2a37e6ae650d7d81f4afede9d879937cab5db Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Sat, 30 Jan 2021 16:58:07 +0800 Subject: [PATCH 356/414] RDMA/hns: Refactor post recv flow Refactor post recv flow by removing unnecessary checking and removing duplicated code. Link: https://lore.kernel.org/r/1611997090-48820-10-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 81 +++++++++++----------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 105019c5270e..1f7042285ed4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -48,8 +48,8 @@ #include "hns_roce_hem.h" #include "hns_roce_hw_v2.h" -static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, - struct ib_sge *sg) +static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, + struct ib_sge *sg) { dseg->lkey = cpu_to_le32(sg->lkey); dseg->addr = cpu_to_le64(sg->addr); @@ -729,6 +729,40 @@ static int check_recv_valid(struct hns_roce_dev *hr_dev, return 0; } +static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr, + u32 wqe_idx) +{ + struct hns_roce_v2_wqe_data_seg *dseg; + struct hns_roce_rinl_sge *sge_list; + void *wqe = NULL; + int i; + + wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); + dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; + for (i = 0; i < wr->num_sge; i++) { + if (!wr->sg_list[i].length) + continue; + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; + } + + if (hr_qp->rq.rsv_sge) { + dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg->addr = 0; + dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + } + + /* rq support inline data */ + if (hr_qp->rq_inl_buf.wqe_cnt) { + sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; + hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge; + for (i = 0; i < wr->num_sge; i++) { + sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr; + sge_list[i].len = wr->sg_list[i].length; + } + } +} + static int hns_roce_v2_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) @@ -736,15 +770,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_v2_wqe_data_seg *dseg; - struct hns_roce_rinl_sge *sge_list; + u32 wqe_idx, nreq, max_sge; unsigned long flags; - void *wqe = NULL; - u32 wqe_idx; - u32 max_sge; - int nreq; int ret; - int i; spin_lock_irqsave(&hr_qp->rq.lock, flags); @@ -764,8 +792,6 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } - wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - if (unlikely(wr->num_sge > max_sge)) { ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", wr->num_sge, max_sge); @@ -774,32 +800,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } - wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - for (i = 0; i < wr->num_sge; i++) { - if (!wr->sg_list[i].length) - continue; - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - } - - if (hr_qp->rq.rsv_sge) { - dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg->addr = 0; - dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); - } - - /* rq support inline data */ - if (hr_qp->rq_inl_buf.wqe_cnt) { - sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; - hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = - (u32)wr->num_sge; - for (i = 0; i < wr->num_sge; i++) { - sge_list[i].addr = - (void *)(u64)wr->sg_list[i].addr; - sge_list[i].len = wr->sg_list[i].length; - } - } + wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); + fill_rq_wqe(hr_qp, wr, wqe_idx); hr_qp->rq.wrid[wqe_idx] = wr->wr_id; } @@ -928,9 +930,8 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; for (i = 0; i < wr->num_sge; ++i) { - dseg[i].len = cpu_to_le32(wr->sg_list[i].length); - dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); - dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; } if (srq->rsv_sge) { From 6b981e2bd9251f4f4d0fe32b1eeb29b0d88813a5 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Sat, 30 Jan 2021 16:58:08 +0800 Subject: [PATCH 357/414] RDMA/hns: Clear remaining unused sges when post_recv The HIP09 requires the driver to clear the unused data segments in wqe buffer to make the hns ROCEE stop reading the remaining invalid sges for RQ. Link: https://lore.kernel.org/r/1611997090-48820-11-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 99 ++++++++++------------ 1 file changed, 47 insertions(+), 52 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1f7042285ed4..7eba9b5bfcb8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -729,28 +729,42 @@ static int check_recv_valid(struct hns_roce_dev *hr_dev, return 0; } -static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr, - u32 wqe_idx) +static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe, + u32 max_sge, bool rsv) { - struct hns_roce_v2_wqe_data_seg *dseg; - struct hns_roce_rinl_sge *sge_list; - void *wqe = NULL; - int i; + struct hns_roce_v2_wqe_data_seg *dseg = wqe; + u32 i, cnt; - wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - for (i = 0; i < wr->num_sge; i++) { + for (i = 0, cnt = 0; i < wr->num_sge; i++) { + /* Skip zero-length sge */ if (!wr->sg_list[i].length) continue; - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; + set_data_seg_v2(dseg + cnt, wr->sg_list + i); + cnt++; } - if (hr_qp->rq.rsv_sge) { - dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg->addr = 0; - dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + /* Fill a reserved sge to make hw stop reading remaining segments */ + if (rsv) { + dseg[cnt].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg[cnt].addr = 0; + dseg[cnt].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + } else { + /* Clear remaining segments to make ROCEE ignore sges */ + if (cnt < max_sge) + memset(dseg + cnt, 0, + (max_sge - cnt) * HNS_ROCE_SGE_SIZE); } +} + +static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr, + u32 wqe_idx, u32 max_sge) +{ + struct hns_roce_rinl_sge *sge_list; + void *wqe = NULL; + u32 i; + + wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); + fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge); /* rq support inline data */ if (hr_qp->rq_inl_buf.wqe_cnt) { @@ -801,8 +815,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, } wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - fill_rq_wqe(hr_qp, wr, wqe_idx); - + fill_rq_wqe(hr_qp, wr, wqe_idx, max_sge); hr_qp->rq.wrid[wqe_idx] = wr->wr_id; } @@ -834,18 +847,18 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, return ret; } -static void *get_srq_wqe(struct hns_roce_srq *srq, int n) +static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n) { return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); } -static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n) +static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n) { return hns_roce_buf_offset(idx_que->mtr.kmem, n << idx_que->entry_shift); } -static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) +static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index) { /* always called with interrupts disabled. */ spin_lock(&srq->lock); @@ -856,7 +869,7 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) spin_unlock(&srq->lock); } -int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq) +int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, u32 nreq) { struct hns_roce_idx_que *idx_que = &srq->idx_que; unsigned int cur; @@ -865,19 +878,18 @@ int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq) return cur + nreq >= srq->wqe_cnt; } -static int find_empty_entry(struct hns_roce_idx_que *idx_que, - unsigned long size) +static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx) { - int wqe_idx; + struct hns_roce_idx_que *idx_que = &srq->idx_que; + u32 pos; - if (unlikely(bitmap_full(idx_que->bitmap, size))) + pos = find_first_zero_bit(idx_que->bitmap, srq->wqe_cnt); + if (unlikely(pos == srq->wqe_cnt)) return -ENOSPC; - wqe_idx = find_first_zero_bit(idx_que->bitmap, size); - - bitmap_set(idx_que->bitmap, wqe_idx, 1); - - return wqe_idx; + bitmap_set(idx_que->bitmap, pos, 1); + *wqe_idx = pos; + return 0; } static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, @@ -886,17 +898,12 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); - struct hns_roce_v2_wqe_data_seg *dseg; + u32 wqe_idx, ind, nreq, max_sge; struct hns_roce_v2_db srq_db; unsigned long flags; - unsigned int ind; __le32 *srq_idx; int ret = 0; - int wqe_idx; - u32 max_sge; void *wqe; - int nreq; - int i; spin_lock_irqsave(&srq->lock, flags); @@ -919,26 +926,14 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, break; } - wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); - if (unlikely(wqe_idx < 0)) { - ret = -ENOMEM; + ret = get_srq_wqe_idx(srq, &wqe_idx); + if (unlikely(ret)) { *bad_wr = wr; break; } - wqe = get_srq_wqe(srq, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - - for (i = 0; i < wr->num_sge; ++i) { - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - } - - if (srq->rsv_sge) { - dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); - dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg[i].addr = 0; - } + wqe = get_srq_wqe_buf(srq, wqe_idx); + fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge); srq_idx = get_idx_buf(&srq->idx_que, ind); *srq_idx = cpu_to_le32(wqe_idx); From 2e07a3d945851f0edc192336b3ac411e806c4da2 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Sat, 30 Jan 2021 16:58:09 +0800 Subject: [PATCH 358/414] RDMA/hns: Refactor hns_roce_v2_post_srq_recv() The SRQ in the hns driver consists of the following four parts: * wqe buf: the buffer to store WQE. * wqe_idx buf: the cqe of SRQ may be not generated in the order of wqe, so the wqe_idx corresponding to the idle WQE needs to be pushed into the index queue which is a FIFO, then it instructs the hardware to obtain the corresponding WQE. * bitmap: bitmap is used to generate and release wqe_idx. When the user has a new WR, the driver finds the idx of the idle wqe in bitmap. When the CQE of wqe is generated, the driver will release the idx. * wr_id buf: wr_id buf is used to store the user's wr_id, then return it to the user when poll_cq verb is invoked. The process of post SRQ recv is refactored to make preceding code clearer. Link: https://lore.kernel.org/r/1611997090-48820-12-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 69 ++++++++++++++-------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7eba9b5bfcb8..d5a63e4c3adf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -869,13 +869,32 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index) spin_unlock(&srq->lock); } -int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, u32 nreq) +static int hns_roce_srqwq_overflow(struct hns_roce_srq *srq) { struct hns_roce_idx_que *idx_que = &srq->idx_que; - unsigned int cur; - cur = idx_que->head - idx_que->tail; - return cur + nreq >= srq->wqe_cnt; + return idx_que->head - idx_que->tail >= srq->wqe_cnt; +} + +static int check_post_srq_valid(struct hns_roce_srq *srq, u32 max_sge, + const struct ib_recv_wr *wr) +{ + struct ib_device *ib_dev = srq->ibsrq.device; + + if (unlikely(wr->num_sge > max_sge)) { + ibdev_err(ib_dev, + "failed to check sge, wr->num_sge = %d, max_sge = %u.\n", + wr->num_sge, max_sge); + return -EINVAL; + } + + if (unlikely(hns_roce_srqwq_overflow(srq))) { + ibdev_err(ib_dev, + "failed to check srqwq status, srqwq is full.\n"); + return -ENOMEM; + } + + return 0; } static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx) @@ -892,36 +911,40 @@ static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx) return 0; } +static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + unsigned int head; + __le32 *buf; + + head = idx_que->head & (srq->wqe_cnt - 1); + + buf = get_idx_buf(idx_que, head); + *buf = cpu_to_le32(wqe_idx); + + idx_que->head++; +} + static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); - u32 wqe_idx, ind, nreq, max_sge; struct hns_roce_v2_db srq_db; unsigned long flags; - __le32 *srq_idx; int ret = 0; + u32 max_sge; + u32 wqe_idx; void *wqe; + u32 nreq; spin_lock_irqsave(&srq->lock, flags); - ind = srq->idx_que.head & (srq->wqe_cnt - 1); max_sge = srq->max_gs - srq->rsv_sge; - for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (unlikely(wr->num_sge > max_sge)) { - ibdev_err(&hr_dev->ib_dev, - "srq: num_sge = %d, max_sge = %u.\n", - wr->num_sge, max_sge); - ret = -EINVAL; - *bad_wr = wr; - break; - } - - if (unlikely(hns_roce_srqwq_overflow(srq, nreq))) { - ret = -ENOMEM; + ret = check_post_srq_valid(srq, max_sge, wr); + if (ret) { *bad_wr = wr; break; } @@ -934,17 +957,11 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, wqe = get_srq_wqe_buf(srq, wqe_idx); fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge); - - srq_idx = get_idx_buf(&srq->idx_que, ind); - *srq_idx = cpu_to_le32(wqe_idx); - + fill_wqe_idx(srq, wqe_idx); srq->wrid[wqe_idx] = wr->wr_id; - ind = (ind + 1) & (srq->wqe_cnt - 1); } if (likely(nreq)) { - srq->idx_que.head += nreq; - /* * Make sure that descriptors are written before * doorbell record. From 204cbe423b6ea8368eaa17b0c42542dc4f719c1f Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Sat, 30 Jan 2021 16:58:10 +0800 Subject: [PATCH 359/414] RDMA/hns: Add verification of QP type when post_recv The post_recv only supports QP types of RC, GSI and UD. Link: https://lore.kernel.org/r/1611997090-48820-13-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d5a63e4c3adf..3adb77d736a7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -721,9 +721,21 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, static int check_recv_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { + struct ib_device *ibdev = &hr_dev->ib_dev; + struct ib_qp *ibqp = &hr_qp->ibqp; + + if (unlikely(ibqp->qp_type != IB_QPT_RC && + ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_UD)) { + ibdev_err(ibdev, "unsupported qp type, qp_type = %d.\n", + ibqp->qp_type); + return -EOPNOTSUPP; + } + if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) return -EIO; - else if (hr_qp->state == IB_QPS_RESET) + + if (hr_qp->state == IB_QPS_RESET) return -EINVAL; return 0; From f405ac83fa252dd0e346f2715b66e7d2adba9027 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 4 Feb 2021 15:50:42 +0100 Subject: [PATCH 360/414] selftests/vDSO: fix ABI selftest on riscv Only older versions of the RISC-V GCC toolchain define __riscv__. Check for __riscv as well, which is used by newer GCC toolchains. Also set VDSO_32BIT based on __riscv_xlen. Before (on riscv64): $ ./vdso_test_abi [vDSO kselftest] VDSO_VERSION: LINUX_4 Could not find __vdso_gettimeofday Could not find __vdso_clock_gettime Could not find __vdso_clock_getres clock_id: CLOCK_REALTIME [PASS] Could not find __vdso_clock_gettime Could not find __vdso_clock_getres clock_id: CLOCK_BOOTTIME [PASS] Could not find __vdso_clock_gettime Could not find __vdso_clock_getres clock_id: CLOCK_TAI [PASS] Could not find __vdso_clock_gettime Could not find __vdso_clock_getres clock_id: CLOCK_REALTIME_COARSE [PASS] Could not find __vdso_clock_gettime Could not find __vdso_clock_getres clock_id: CLOCK_MONOTONIC [PASS] Could not find __vdso_clock_gettime Could not find __vdso_clock_getres clock_id: CLOCK_MONOTONIC_RAW [PASS] Could not find __vdso_clock_gettime Could not find __vdso_clock_getres clock_id: CLOCK_MONOTONIC_COARSE [PASS] Could not find __vdso_time After (on riscv32): $ ./vdso_test_abi [vDSO kselftest] VDSO_VERSION: LINUX_4.15 The time is 1612449376.015086 The time is 1612449376.18340784 The resolution is 0 1 clock_id: CLOCK_REALTIME [PASS] The time is 774.842586182 The resolution is 0 1 clock_id: CLOCK_BOOTTIME [PASS] The time is 1612449376.22536565 The resolution is 0 1 clock_id: CLOCK_TAI [PASS] The time is 1612449376.20885172 The resolution is 0 4000000 clock_id: CLOCK_REALTIME_COARSE [PASS] The time is 774.845491269 The resolution is 0 1 clock_id: CLOCK_MONOTONIC [PASS] The time is 774.849534200 The resolution is 0 1 clock_id: CLOCK_MONOTONIC_RAW [PASS] The time is 774.842139684 The resolution is 0 4000000 clock_id: CLOCK_MONOTONIC_COARSE [PASS] Could not find __vdso_time Signed-off-by: Tobias Klauser Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Acked-by: Vincenzo Frascino Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_config.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h index 6a6fe8d4ff55..6188b16827d1 100644 --- a/tools/testing/selftests/vDSO/vdso_config.h +++ b/tools/testing/selftests/vDSO/vdso_config.h @@ -47,10 +47,12 @@ #elif defined(__x86_64__) #define VDSO_VERSION 0 #define VDSO_NAMES 1 -#elif defined(__riscv__) +#elif defined(__riscv__) || defined(__riscv) #define VDSO_VERSION 5 #define VDSO_NAMES 1 +#if __riscv_xlen == 32 #define VDSO_32BIT 1 +#endif #else /* nds32 */ #define VDSO_VERSION 4 #define VDSO_NAMES 1 From b1cd3d82a964921a6828588fb52a3502acc7d0ea Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 5 Feb 2021 14:33:59 +0800 Subject: [PATCH 361/414] selftests: breakpoints: Use correct error messages in breakpoint_test_arm64.c When call ptrace(PTRACE_CONT, ...) failed, use correct error messages. Signed-off-by: Tiezhu Yang Signed-off-by: Shuah Khan --- tools/testing/selftests/breakpoints/breakpoint_test_arm64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c index ad41ea69001b..e7041816085a 100644 --- a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c +++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c @@ -145,7 +145,7 @@ static bool run_test(int wr_size, int wp_size, int wr, int wp) if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) { ksft_print_msg( - "ptrace(PTRACE_SINGLESTEP) failed: %s\n", + "ptrace(PTRACE_CONT) failed: %s\n", strerror(errno)); return false; } @@ -159,7 +159,7 @@ static bool run_test(int wr_size, int wp_size, int wr, int wp) } alarm(0); if (WIFEXITED(status)) { - ksft_print_msg("child did not single-step\n"); + ksft_print_msg("child exited prematurely\n"); return false; } if (!WIFSTOPPED(status)) { From 429fa9698957d1a910535ce5e33aedf5adfdabc1 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 1 Feb 2021 13:29:22 +0200 Subject: [PATCH 362/414] RDMA/siw: Fix calculation of tx_valid_cpus size The size of tx_valid_cpus was calculated under the assumption that the numa nodes identifiers are continuous, which is not the case in all archs as this could lead to the following panic when trying to access an invalid tx_valid_cpus index, avoid the following panic by using nr_node_ids instead of num_online_nodes() to allocate the tx_valid_cpus size. Kernel attempted to read user page (8) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000008 Faulting instruction address: 0xc0080000081b4a90 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: siw(+) rfkill rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib rdma_ucm sunrpc ib_umad rdma_cm ib_cm iw_cm i40iw ib_uverbs ib_core i40e ses enclosure scsi_transport_sas ipmi_powernv ibmpowernv at24 ofpart ipmi_devintf regmap_i2c ipmi_msghandler powernv_flash uio_pdrv_genirq uio mtd opal_prd zram ip_tables xfs libcrc32c sd_mod t10_pi ast i2c_algo_bit drm_vram_helper drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops cec drm_ttm_helper ttm drm vmx_crypto aacraid drm_panel_orientation_quirks dm_mod CPU: 40 PID: 3279 Comm: modprobe Tainted: G W X --------- --- 5.11.0-0.rc4.129.eln108.ppc64le #2 NIP: c0080000081b4a90 LR: c0080000081b4a2c CTR: c0000000007ce1c0 REGS: c000000027fa77b0 TRAP: 0300 Tainted: G W X --------- --- (5.11.0-0.rc4.129.eln108.ppc64le) MSR: 9000000002009033 CR: 44224882 XER: 00000000 CFAR: c0000000007ce200 DAR: 0000000000000008 DSISR: 40000000 IRQMASK: 0 GPR00: c0080000081b4a2c c000000027fa7a50 c0080000081c3900 0000000000000040 GPR04: c000000002023080 c000000012e1c300 000020072ad70000 0000000000000001 GPR08: c000000001726068 0000000000000008 0000000000000008 c0080000081b5758 GPR12: c0000000007ce1c0 c0000007fffc3000 00000001590b1e40 0000000000000000 GPR16: 0000000000000000 0000000000000001 000000011ad68fc8 00007fffcc09c5c8 GPR20: 0000000000000008 0000000000000000 00000001590b2850 00000001590b1d30 GPR24: 0000000000043d68 000000011ad67a80 000000011ad67a80 0000000000100000 GPR28: c000000012e1c300 c0000000020271c8 0000000000000001 c0080000081bf608 NIP [c0080000081b4a90] siw_init_cpulist+0x194/0x214 [siw] LR [c0080000081b4a2c] siw_init_cpulist+0x130/0x214 [siw] Call Trace: [c000000027fa7a50] [c0080000081b4a2c] siw_init_cpulist+0x130/0x214 [siw] (unreliable) [c000000027fa7a90] [c0080000081b4e68] siw_init_module+0x40/0x2a0 [siw] [c000000027fa7b30] [c0000000000124f4] do_one_initcall+0x84/0x2e0 [c000000027fa7c00] [c000000000267ffc] do_init_module+0x7c/0x350 [c000000027fa7c90] [c00000000026a180] __do_sys_init_module+0x210/0x250 [c000000027fa7db0] [c0000000000387e4] system_call_exception+0x134/0x230 [c000000027fa7e10] [c00000000000d660] system_call_common+0xf0/0x27c Instruction dump: 40810044 3d420000 e8bf0000 e88a82d0 3d420000 e90a82c8 792a1f24 7cc4302a 7d2642aa 79291f24 7d25482a 7d295214 <7d4048a8> 7d4a3b78 7d4049ad 40c2fff4 Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface") Link: https://lore.kernel.org/r/20210201112922.141085-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Bernard Metzler Tested-by: Yi Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 81a294269592..cf55326f2ab4 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -135,7 +135,7 @@ static struct { static int siw_init_cpulist(void) { - int i, num_nodes = num_possible_nodes(); + int i, num_nodes = nr_node_ids; memset(siw_tx_thread, 0, sizeof(siw_tx_thread)); From 01584a5edcc4a04ed4b993f75b6cc4bcf3c21818 Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Sat, 30 Jan 2021 17:05:13 +0800 Subject: [PATCH 363/414] RDMA/hns: Add support of direct wqe Direct wqe is a mechanism to fill wqe directly into the hardware. In the case of light load, the wqe will be filled into pcie bar space of the hardware, this will reduce one memory access operation and therefore reduce the latency. Link: https://lore.kernel.org/r/1611997513-27107-1-git-send-email-liweihang@huawei.com Signed-off-by: Yixing Liu Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 6 +++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 44 ++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 13 ++++++ 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index d51641af9342..c9a9e80d91d2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -93,6 +93,7 @@ #define HNS_ROCE_MAX_PORTS 6 #define HNS_ROCE_GID_SIZE 16 #define HNS_ROCE_SGE_SIZE 16 +#define HNS_ROCE_DWQE_SIZE 65536 #define HNS_ROCE_HOP_NUM_0 0xff @@ -649,6 +650,10 @@ struct hns_roce_work { u32 queue_num; }; +enum { + HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5), +}; + struct hns_roce_qp { struct ib_qp ibqp; struct hns_roce_wq rq; @@ -986,6 +991,7 @@ struct hns_roce_dev { struct mutex pgdir_mutex; int irq[HNS_ROCE_MAX_IRQ_NUM]; u8 __iomem *reg_base; + void __iomem *mem_base; struct hns_roce_caps caps; struct xarray qp_table_xa; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 3adb77d736a7..c962f26f63e7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -503,6 +503,8 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, if (ret) return ret; + qp->sl = to_hr_ah(ud_wr(wr)->ah)->av.sl; + set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge); /* @@ -635,6 +637,8 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev, V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn); roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M, V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB); + /* indicates data on new BAR, 0 : SQ doorbell, 1 : DWQE */ + roce_set_bit(sq_db.byte_4, V2_DB_FLAG_S, 0); roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M, V2_DB_PARAMETER_IDX_S, qp->sq.head); roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M, @@ -644,6 +648,38 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev, } } +static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val, + u64 __iomem *dest) +{ +#define HNS_ROCE_WRITE_TIMES 8 + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + int i; + + if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle)) + for (i = 0; i < HNS_ROCE_WRITE_TIMES; i++) + writeq_relaxed(*(val + i), dest + i); +} + +static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, + void *wqe) +{ + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; + + /* All kinds of DirectWQE have the same header field layout */ + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1); + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M, + V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl); + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M, + V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, qp->sl >> 2); + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M, + V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); + + hns_roce_write512(hr_dev, wqe, hr_dev->mem_base + + HNS_ROCE_DWQE_SIZE * qp->ibqp.qp_num); +} + static int hns_roce_v2_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) @@ -710,7 +746,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, qp->next_sge = sge_idx; /* Memory barrier */ wmb(); - update_sq_db(hr_dev, qp); + + if (nreq == 1 && qp->sq.head == qp->sq.tail + 1 && + (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) + write_dwqe(hr_dev, qp, wqe); + else + update_sq_db(hr_dev, qp); } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -6310,6 +6351,7 @@ static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, /* Get info from NIC driver. */ hr_dev->reg_base = handle->rinfo.roce_io_base; + hr_dev->mem_base = handle->rinfo.roce_mem_base; hr_dev->caps.num_ports = 1; hr_dev->iboe.netdevs[0] = handle->rinfo.netdev; hr_dev->iboe.phy_port[0] = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index e46c9354609e..f29438cff456 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1136,6 +1136,8 @@ struct hns_roce_v2_mpt_entry { #define V2_DB_BYTE_4_CMD_S 24 #define V2_DB_BYTE_4_CMD_M GENMASK(27, 24) +#define V2_DB_FLAG_S 31 + #define V2_DB_PARAMETER_IDX_S 0 #define V2_DB_PARAMETER_IDX_M GENMASK(15, 0) @@ -1232,6 +1234,15 @@ struct hns_roce_v2_rc_send_wqe { #define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0 #define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5 +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5) + +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13 +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13) + +#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15 +#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15) + #define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7 #define V2_RC_SEND_WQE_BYTE_4_CQE_S 8 @@ -1254,6 +1265,8 @@ struct hns_roce_v2_rc_send_wqe { #define V2_RC_FRMR_WQE_BYTE_4_LW_S 23 +#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31 + #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0) From 773f841ab1aeb0134e3872eb3545592732db8218 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Fri, 5 Feb 2021 17:39:23 +0800 Subject: [PATCH 364/414] RDMA/hns: Avoid filling sgid index when modifying QP to RTR ULP usually set IB(V)_QP_AV when trying to modify QP to RTR if they want to record sgid index into QPC. For UD QPs, it is useless because it will be included in WQE. For RC QPs, it will be filled in hns_roce_set_path(). So sgid index shouldn't be filled by default. Then hns_get_gid_index() is moved to hns_roce_hw_v1.c because it is only called in it. Fixes: 926a01dc000d ("RDMA/hns: Add QP operations support for hip08 SoC") Link: https://lore.kernel.org/r/1612517974-31867-2-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 16 ++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 11 ----------- drivers/infiniband/hw/hns/hns_roce_main.c | 16 ---------------- 3 files changed, 16 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 23fe8e9f61da..262ad58f9606 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -43,6 +43,22 @@ #include "hns_roce_hem.h" #include "hns_roce_hw_v1.h" +/** + * hns_get_gid_index - Get gid index. + * @hr_dev: pointer to structure hns_roce_dev. + * @port: port, value range: 0 ~ MAX + * @gid_index: gid_index, value range: 0 ~ MAX + * Description: + * N ports shared gids, allocation method as follow: + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * And so on + */ +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) +{ + return gid_index * hr_dev->caps.num_ports + port; +} + static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg) { dseg->lkey = cpu_to_le32(sg->lkey); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c962f26f63e7..f0691f4ea688 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4308,7 +4308,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { - const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; @@ -4316,7 +4315,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t irrl_ba; enum ib_mtu mtu; u8 lp_pktn_ini; - u8 port_num; u64 *mtts; u8 *dmac; u8 *smac; @@ -4397,15 +4395,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); } - /* Configure GID index */ - port_num = rdma_ah_get_port_num(&attr->ah_attr); - roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, - hns_get_gid_index(hr_dev, port_num - 1, - grh->sgid_index)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0); - memcpy(&(context->dmac), dmac, sizeof(u32)); roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4]))); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 79782209dbb5..c29215a8de3c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -42,22 +42,6 @@ #include "hns_roce_device.h" #include "hns_roce_hem.h" -/** - * hns_get_gid_index - Get gid index. - * @hr_dev: pointer to structure hns_roce_dev. - * @port: port, value range: 0 ~ MAX - * @gid_index: gid_index, value range: 0 ~ MAX - * Description: - * N ports shared gids, allocation method as follow: - * GID[0][0], GID[1][0],.....GID[N - 1][0], - * GID[0][0], GID[1][0],.....GID[N - 1][0], - * And so on - */ -u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) -{ - return gid_index * hr_dev->caps.num_ports + port; -} - static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; From ea4092f3b56b236d08890ea589506ebd76248c53 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Fri, 5 Feb 2021 17:39:24 +0800 Subject: [PATCH 365/414] RDMA/hns: Fix type of sq_signal_bits This bit should be in type of enum ib_sig_type, or there will be a sparse warning. Fixes: bfe860351e31 ("RDMA/hns: Fix cast from or to restricted __le32 for driver") Link: https://lore.kernel.org/r/1612517974-31867-3-git-send-email-liweihang@huawei.com Reported-by: kernel test robot Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c9a9e80d91d2..1f94154323c3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -661,7 +661,7 @@ struct hns_roce_qp { struct hns_roce_db sdb; unsigned long en_flags; u32 doorbell_qpn; - u32 sq_signal_bits; + enum ib_sig_type sq_signal_bits; struct hns_roce_wq sq; struct hns_roce_mtr mtr; From 9ea9a53ea93be1cc66729ceb920f0d07285d6bfd Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Fri, 5 Feb 2021 17:39:25 +0800 Subject: [PATCH 366/414] RDMA/hns: Add mapped page count checking for MTR Add the mapped page count checking flow to avoid invalid page size when creating MTR. Fixes: 38389eaa4db1 ("RDMA/hns: Add mtr support for mixed multihop addressing") Link: https://lore.kernel.org/r/1612517974-31867-4-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hem.c | 9 ++-- drivers/infiniband/hw/hns/hns_roce_mr.c | 56 ++++++++++++++---------- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index edc9d6b98d95..cfd2e1b60c7f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1075,9 +1075,8 @@ static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev, return NULL; if (exist_bt) { - hem->addr = dma_alloc_coherent(hr_dev->dev, - count * BA_BYTE_LEN, - &hem->dma_addr, GFP_KERNEL); + hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN, + &hem->dma_addr, GFP_KERNEL); if (!hem->addr) { kfree(hem); return NULL; @@ -1336,6 +1335,10 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, if (ba_num < 1) return -ENOMEM; + if (ba_num > unit) + return -ENOBUFS; + + ba_num = min_t(int, ba_num, unit); INIT_LIST_HEAD(&temp_root); offset = r->offset; /* indicate to last region */ diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 5a2a557c37b8..79b3c3023fe7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -596,30 +596,26 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) } static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, struct hns_roce_buf_region *region) + struct hns_roce_buf_region *region, dma_addr_t *pages, + int max_count) { + int count, npage; + int offset, end; __le64 *mtts; - int offset; - int count; - int npage; u64 addr; - int end; int i; - /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */ - if (!region->hopnum) - return 0; - offset = region->offset; end = offset + region->count; npage = 0; - while (offset < end) { + while (offset < end && npage < max_count) { + count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset, &count, NULL); if (!mtts) return -ENOBUFS; - for (i = 0; i < count; i++) { + for (i = 0; i < count && npage < max_count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) addr = to_hr_hw_page_addr(pages[npage]); else @@ -631,7 +627,7 @@ static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, offset += count; } - return 0; + return npage; } static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) @@ -779,8 +775,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_region *r; - unsigned int i; - int err; + unsigned int i, mapped_cnt; + int ret; /* * Only use the first page address as root ba when hopnum is 0, this @@ -791,26 +787,42 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return 0; } - for (i = 0; i < mtr->hem_cfg.region_count; i++) { + for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count && + mapped_cnt < page_cnt; i++) { r = &mtr->hem_cfg.region[i]; + /* if hopnum is 0, no need to map pages in this region */ + if (!r->hopnum) { + mapped_cnt += r->count; + continue; + } + if (r->offset + r->count > page_cnt) { - err = -EINVAL; + ret = -EINVAL; ibdev_err(ibdev, "failed to check mtr%u end %u + %u, max %u.\n", i, r->offset, r->count, page_cnt); - return err; + return ret; } - err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); - if (err) { + ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset], + page_cnt - mapped_cnt); + if (ret < 0) { ibdev_err(ibdev, "failed to map mtr%u offset %u, ret = %d.\n", - i, r->offset, err); - return err; + i, r->offset, ret); + return ret; } + mapped_cnt += ret; + ret = 0; } - return 0; + if (mapped_cnt < page_cnt) { + ret = -ENOBUFS; + ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n", + mapped_cnt, page_cnt); + } + + return ret; } int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, From 7373de9adb19aebed2781d3fdde576533d626d7a Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Fri, 5 Feb 2021 17:39:26 +0800 Subject: [PATCH 367/414] RDMA/hns: Disable RQ inline by default This feature should only be enabled by querying capability from firmware. Fixes: ba6bb7e97421 ("RDMA/hns: Add interfaces to get pf capabilities from firmware") Link: https://lore.kernel.org/r/1612517974-31867-5-git-send-email-liweihang@huawei.com Signed-off-by: Lijun Ou Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f0691f4ea688..3ba678352a86 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1949,7 +1949,6 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | HNS_ROCE_CAP_FLAG_ROCE_V1_V2 | - HNS_ROCE_CAP_FLAG_RQ_INLINE | HNS_ROCE_CAP_FLAG_RECORD_DB | HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; From 3fe07a008e0b4f88280e0c66241fdfa02f1604a2 Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Fri, 5 Feb 2021 17:39:28 +0800 Subject: [PATCH 368/414] RDMA/hns: Skip qp_flow_control_init() for HIP09 Since HIP09 does not require this function, it should be masked. Link: https://lore.kernel.org/r/1612517974-31867-7-git-send-email-liweihang@huawei.com Signed-off-by: Yixing Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 3ba678352a86..75e331acf8c6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5235,6 +5235,9 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc desc; int ret, i; + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; + mutex_lock(&hr_dev->qp_table.scc_mutex); /* set scc ctx clear done flag */ From 86f767e6fc1e719215ccf2b2ec65466f505f731b Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Fri, 5 Feb 2021 17:39:29 +0800 Subject: [PATCH 369/414] RDMA/hns: Replace wmb&__raw_writeq with writeq Currently, the driver updates doorbell looks like this: post() { wqe.field = 0x111; wmb(); update_wq_db(); } update_wq_db() { db.field = 0x222; __raw_writeq(db, db_reg); } writeq() is a better choice than __raw_writeq() because it calls dma_wmb() to barrier in ARM64, and dma_wmb() is better than wmb() for ROCEE device. This patch removes all wmb() before updating doorbell of SQ/RQ/CQ/SRQ by replacing __raw_writeq() with writeq() to improve performence. The new process looks like this: post() { wqe.field = 0x111; update_wq_db(); } update_wq_db() { db.field = 0x222; writeq(db, db_reg); } Link: https://lore.kernel.org/r/1612517974-31867-8-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 15 --------------- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 20 +------------------- 3 files changed, 2 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1f94154323c3..74eb08f42ac2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1077,7 +1077,7 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { - __raw_writeq(*(u64 *) val, dest); + writeq(*(u64 *)val, dest); } static inline struct hns_roce_qp diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 262ad58f9606..5346fdca9473 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -330,8 +330,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, /* Set DB return */ if (likely(nreq)) { qp->sq.head += nreq; - /* Memory barrier */ - wmb(); roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M, SQ_DOORBELL_U32_4_SQ_HEAD_S, @@ -411,8 +409,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, out: if (likely(nreq)) { hr_qp->rq.head += nreq; - /* Memory barrier */ - wmb(); if (ibqp->qp_type == IB_QPT_GSI) { __le32 tmp; @@ -1984,12 +1980,6 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if (nfreed) { hr_cq->cons_index += nfreed; - /* - * Make sure update of buffer contents is done before - * updating consumer index. - */ - wmb(); - hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); } } @@ -2330,8 +2320,6 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) *hr_cq->tptr_addr = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); - /* Memroy barrier */ - wmb(); hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); } @@ -3220,9 +3208,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, * need to hw to flash RQ HEAD by DB again */ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { - /* Memory barrier */ - wmb(); - roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M, RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head); roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 75e331acf8c6..175a5eed3f4d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -744,8 +744,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, if (likely(nreq)) { qp->sq.head += nreq; qp->next_sge = sge_idx; - /* Memory barrier */ - wmb(); if (nreq == 1 && qp->sq.head == qp->sq.tail + 1 && (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) @@ -875,8 +873,6 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, out: if (likely(nreq)) { hr_qp->rq.head += nreq; - /* Memory barrier */ - wmb(); /* * Hip08 hardware cannot flush the WQEs in RQ if the QP state @@ -1015,12 +1011,6 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, } if (likely(nreq)) { - /* - * Make sure that descriptors are written before - * doorbell record. - */ - wmb(); - srq_db.byte_4 = cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | (srq->srqn & V2_DB_BYTE_4_TAG_M)); @@ -3198,11 +3188,6 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if (nfreed) { hr_cq->cons_index += nfreed; - /* - * Make sure update of buffer contents is done before - * updating consumer index. - */ - wmb(); hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); } } @@ -3711,11 +3696,8 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, break; } - if (npolled) { - /* Memory barrier */ - wmb(); + if (npolled) hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); - } out: spin_unlock_irqrestore(&hr_cq->lock, flags); From c05ffb1f7db2d2060530a1e0c41a496ccb44328f Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Fri, 5 Feb 2021 17:39:30 +0800 Subject: [PATCH 370/414] RDMA/hns: Move HIP06 related definitions into hns_roce_hw_v1.h hns_roce_device.h is not specific to hardware, some definitions are only used for HIP06, they should be moved into hns_roce_hw_v1.h. Link: https://lore.kernel.org/r/1612517974-31867-9-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 41 -------------------- drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 43 +++++++++++++++++++++ 2 files changed, 43 insertions(+), 41 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 74eb08f42ac2..315c0137f584 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -170,44 +170,6 @@ enum hns_roce_event { HNS_ROCE_EVENT_TYPE_FLR = 0x15, }; -/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */ -enum { - HNS_ROCE_LWQCE_QPC_ERROR = 1, - HNS_ROCE_LWQCE_MTU_ERROR = 2, - HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR = 3, - HNS_ROCE_LWQCE_WQE_ADDR_ERROR = 4, - HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR = 5, - HNS_ROCE_LWQCE_SL_ERROR = 6, - HNS_ROCE_LWQCE_PORT_ERROR = 7, -}; - -/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */ -enum { - HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1, - HNS_ROCE_LAVWQE_LENGTH_ERROR = 2, - HNS_ROCE_LAVWQE_VA_ERROR = 3, - HNS_ROCE_LAVWQE_PD_ERROR = 4, - HNS_ROCE_LAVWQE_RW_ACC_ERROR = 5, - HNS_ROCE_LAVWQE_KEY_STATE_ERROR = 6, - HNS_ROCE_LAVWQE_MR_OPERATION_ERROR = 7, -}; - -/* DOORBELL overflow subtype */ -enum { - HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1, - HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF = 2, - HNS_ROCE_DB_SUBTYPE_ODB_OVF = 3, - HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF = 4, - HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP = 5, - HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP = 6, -}; - -enum { - /* RQ&SRQ related operations */ - HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, - HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07, -}; - #define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12 enum { @@ -260,9 +222,6 @@ enum { #define HNS_ROCE_CMD_SUCCESS 1 -#define HNS_ROCE_PORT_DOWN 0 -#define HNS_ROCE_PORT_UP 1 - /* The minimum page size is 4K for hardware */ #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 46ab0a321d21..84383236e47d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -193,6 +193,49 @@ #define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0 #define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M GENMASK(4, 0) +/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */ +enum { + HNS_ROCE_LWQCE_QPC_ERROR = 1, + HNS_ROCE_LWQCE_MTU_ERROR, + HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR, + HNS_ROCE_LWQCE_WQE_ADDR_ERROR, + HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR, + HNS_ROCE_LWQCE_SL_ERROR, + HNS_ROCE_LWQCE_PORT_ERROR, +}; + +/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */ +enum { + HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1, + HNS_ROCE_LAVWQE_LENGTH_ERROR, + HNS_ROCE_LAVWQE_VA_ERROR, + HNS_ROCE_LAVWQE_PD_ERROR, + HNS_ROCE_LAVWQE_RW_ACC_ERROR, + HNS_ROCE_LAVWQE_KEY_STATE_ERROR, + HNS_ROCE_LAVWQE_MR_OPERATION_ERROR, +}; + +/* DOORBELL overflow subtype */ +enum { + HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1, + HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF, + HNS_ROCE_DB_SUBTYPE_ODB_OVF, + HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF, + HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP, + HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP, +}; + +enum { + /* RQ&SRQ related operations */ + HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, + HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE, +}; + +enum { + HNS_ROCE_PORT_DOWN = 0, + HNS_ROCE_PORT_UP, +}; + struct hns_roce_cq_context { __le32 cqc_byte_4; __le32 cq_bt_l; From 993703370a6f929fcef39a14d1be17cbc1d26a30 Mon Sep 17 00:00:00 2001 From: Xinhao Liu Date: Fri, 5 Feb 2021 17:39:31 +0800 Subject: [PATCH 371/414] RDMA/hns: Remove some magic numbers Use macros instead of magic numbers to represent shift of dma_handle_wqe, dma_handle_idx and UDP destination port number of RoCEv2. Link: https://lore.kernel.org/r/1612517974-31867-10-git-send-email-liweihang@huawei.com Signed-off-by: Xinhao Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 175a5eed3f4d..7d1bfcfd2db6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1603,7 +1603,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_S, 0x3e8); roce_set_field(req->time_cfg_udp_port, CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M, - CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S, 0x12b7); + CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S, + ROCE_V2_UDP_DPORT); return hns_roce_cmq_send(hr_dev, &desc, 1); } @@ -5266,6 +5267,9 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, return ret; } +#define DMA_IDX_SHIFT 3 +#define DMA_WQE_SHIFT 3 + static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq, struct hns_roce_srq_context *ctx) { @@ -5288,8 +5292,9 @@ static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq, hr_reg_write(ctx, SRQC_IDX_HOP_NUM, to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt)); - hr_reg_write(ctx, SRQC_IDX_BT_BA_L, dma_handle_idx >> 3); - hr_reg_write(ctx, SRQC_IDX_BT_BA_H, upper_32_bits(dma_handle_idx >> 3)); + hr_reg_write(ctx, SRQC_IDX_BT_BA_L, dma_handle_idx >> DMA_IDX_SHIFT); + hr_reg_write(ctx, SRQC_IDX_BT_BA_H, + upper_32_bits(dma_handle_idx >> DMA_IDX_SHIFT)); hr_reg_write(ctx, SRQC_IDX_BA_PG_SZ, to_hr_hw_page_shift(idx_que->mtr.hem_cfg.ba_pg_shift)); @@ -5342,8 +5347,9 @@ static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, srq->wqe_cnt)); - hr_reg_write(ctx, SRQC_WQE_BT_BA_L, dma_handle_wqe >> 3); - hr_reg_write(ctx, SRQC_WQE_BT_BA_H, upper_32_bits(dma_handle_wqe >> 3)); + hr_reg_write(ctx, SRQC_WQE_BT_BA_L, dma_handle_wqe >> DMA_WQE_SHIFT); + hr_reg_write(ctx, SRQC_WQE_BT_BA_H, + upper_32_bits(dma_handle_wqe >> DMA_WQE_SHIFT)); hr_reg_write(ctx, SRQC_WQE_BA_PG_SZ, to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); From 62490fd5a8654f9639a6766921ee72f5dbc6a479 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Fri, 5 Feb 2021 17:39:32 +0800 Subject: [PATCH 372/414] RDMA/hns: Avoid unnecessary memset on WQEs in post_send All fields of WQE will be rewrote, so the memset is unnecessary. And when SQ is working in OWNER mode, the pipeline may prefetch the WQEs beyond PI, the memset operation may flip the owner bit too early, then the pipeline may get a wrong WQ. Link: https://lore.kernel.org/r/1612517974-31867-11-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7d1bfcfd2db6..45a11fdaaa50 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -469,7 +469,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); - memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe)); ret = set_ud_opcode(ud_sq_wqe, wr); if (WARN_ON(ret)) @@ -575,7 +574,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); - memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); rc_sq_wqe->msg_len = cpu_to_le32(msg_len); From cd0a4baf36dafb0a6d1a57be712ab6263ef169bc Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Fri, 5 Feb 2021 17:39:33 +0800 Subject: [PATCH 373/414] RDMA/hns: Remove unnecessary wrap around for EQ's consumer index The hns driver wrap around the consumer index of AEQ and CEQ when they reach to two times of queue entries number for owner mechanism, actually, it is unnecessary to wrap around since the hardware itself will mask it before use. Link: https://lore.kernel.org/r/1612517974-31867-12-git-send-email-liweihang@huawei.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 45a11fdaaa50..28c2deac58ff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5669,9 +5669,6 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; aeqe_found = 1; - if (eq->cons_index > (2 * eq->entries - 1)) - eq->cons_index = 0; - hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); aeqe = next_aeqe_sw_v2(eq); @@ -5714,9 +5711,6 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; ceqe_found = 1; - if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1)) - eq->cons_index = 0; - ceqe = next_ceqe_sw_v2(eq); } From a5887d62072e09a9d3b01549ac5d555bbeb9be3d Mon Sep 17 00:00:00 2001 From: Xinhao Liu Date: Fri, 5 Feb 2021 17:39:34 +0800 Subject: [PATCH 374/414] RDMA/hns: Delete redundant judgment when preparing descriptors There is no need to use a for loop to assign values for an array of cmd descriptors which has only two elements. Link: https://lore.kernel.org/r/1612517974-31867-13-git-send-email-liweihang@huawei.com Signed-off-by: Xinhao Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 40 ++++++++-------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 28c2deac58ff..d9f94b454085 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1613,17 +1613,13 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) struct hns_roce_pf_res_a *req_a; struct hns_roce_pf_res_b *req_b; int ret; - int i; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], - HNS_ROCE_OPC_QUERY_PF_RES, true); + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_QUERY_PF_RES, + true); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_QUERY_PF_RES, + true); ret = hns_roce_cmq_send(hr_dev, desc, 2); if (ret) @@ -1716,19 +1712,16 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) struct hns_roce_cmq_desc desc[2]; struct hns_roce_vf_res_a *req_a; struct hns_roce_vf_res_b *req_b; - int i; req_a = (struct hns_roce_vf_res_a *)desc[0].data; req_b = (struct hns_roce_vf_res_b *)desc[1].data; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], - HNS_ROCE_OPC_ALLOC_VF_RES, false); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_ALLOC_VF_RES, + false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_ALLOC_VF_RES, + false); roce_set_field(req_a->vf_qpc_bt_idx_num, VF_RES_A_DATA_1_VF_QPC_BT_IDX_M, @@ -2409,7 +2402,6 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, struct hns_roce_link_table_entry *entry; enum hns_roce_opcode_type opcode; u32 page_num; - int i; switch (type) { case TSQ_LINK_TABLE: @@ -2427,14 +2419,10 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, page_num = link_tbl->npages; entry = link_tbl->table.buf; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false); + hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } + hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false); req_a->base_addr_l = cpu_to_le32(link_tbl->table.map & 0xffffffff); req_a->base_addr_h = cpu_to_le32(link_tbl->table.map >> 32); From 1602a31d71d3607f3f1dc08692aad7105ca80e5b Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 18 Dec 2020 17:56:51 +0100 Subject: [PATCH 375/414] selftests/timens: add futex binary to .gitignore Add the futex test binary introduced by commit a4fd8414659b ("selftests/timens: Add a test for futex()") to .gitignore. Signed-off-by: Tobias Klauser Signed-off-by: Shuah Khan --- tools/testing/selftests/timens/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore index 2e43851b47c1..fe1eb8271b35 100644 --- a/tools/testing/selftests/timens/.gitignore +++ b/tools/testing/selftests/timens/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only clock_nanosleep exec +futex gettime_perf gettime_perf_cold procfs From db72438c9319cfd37e3c237a7754ca862ae12d63 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 2 Feb 2021 09:13:09 +0200 Subject: [PATCH 376/414] RDMA/mlx5: Cleanup the synchronize_srcu() from the ODP flow Cleanup the synchronize_srcu() from the ODP flow as it was found to be a very heavy time consumer as part of dereg_mr. For example de-registration of 10000 ODP MRs each with size of 2M hugepage took 19.6 sec comparing de-registration of same number of non ODP MRs that took 172 ms. The new locking scheme uses the wait_event() mechanism which follows the use count of the MR instead of using synchronize_srcu(). By that change, the time required for the above test took 95 ms which is even better than the non ODP flow. Once fully dropped the srcu usage, had to come with a lock to protect the XA access. As part of using the above mechanism we could also clean the num_deferred_work stuff and follow the use count instead. Link: https://lore.kernel.org/r/20210202071309.2057998-1-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 13 +- drivers/infiniband/hw/mlx5/main.c | 5 - drivers/infiniband/hw/mlx5/mlx5_ib.h | 31 ++- drivers/infiniband/hw/mlx5/mr.c | 26 +-- drivers/infiniband/hw/mlx5/odp.c | 224 +++++++------------ drivers/net/ethernet/mellanox/mlx5/core/mr.c | 1 + include/linux/mlx5/driver.h | 2 + 7 files changed, 127 insertions(+), 175 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index e6d2709eb09e..e39661db0d2f 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1310,9 +1310,9 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->size = MLX5_GET64(mkc, mkc, len); mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); + init_waitqueue_head(&mkey->wait); - return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey, - GFP_KERNEL)); + return mlx5r_store_odp_mkey(dev, mkey); } static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, @@ -1385,16 +1385,15 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, int ret; dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY && + xa_erase(&obj->ib_dev->odp_mkeys, + mlx5_base_mkey(obj->devx_mr.mmkey.key))) /* * The pagefault_single_data_segment() does commands against * the mmkey, we must wait for that to stop before freeing the * mkey, as another allocation could get the same mkey #. */ - xa_erase(&obj->ib_dev->odp_mkeys, - mlx5_base_mkey(obj->devx_mr.mmkey.key)); - synchronize_srcu(&dev->odp_srcu); - } + mlx5r_deref_wait_odp_mkey(&obj->devx_mr.mmkey); if (obj->flags & DEVX_OBJ_FLAGS_DCT) ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ce4a9eba53f9..93dd568a5397 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3869,7 +3869,6 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); WARN_ON(!xa_empty(&dev->odp_mkeys)); - cleanup_srcu_struct(&dev->odp_srcu); mutex_destroy(&dev->cap_mask_mutex); WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); @@ -3914,10 +3913,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); - err = init_srcu_struct(&dev->odp_srcu); - if (err) - goto err_mp; - mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index ab52083634e6..88cc26e008fc 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -684,11 +684,8 @@ struct mlx5_ib_mr { u64 pi_iova; /* For ODP and implicit */ - atomic_t num_deferred_work; - wait_queue_head_t q_deferred_work; struct xarray implicit_children; union { - struct rcu_head rcu; struct list_head elm; struct work_struct work; } odp_destroy; @@ -1068,11 +1065,6 @@ struct mlx5_ib_dev { u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; - /* - * Sleepable RCU that prevents destruction of MRs while they are still - * being used by a page fault handler. - */ - struct srcu_struct odp_srcu; struct xarray odp_mkeys; u32 null_mkey; @@ -1599,6 +1591,29 @@ static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, return true; } +static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev, + struct mlx5_core_mkey *mmkey) +{ + refcount_set(&mmkey->usecount, 1); + + return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mmkey->key), + mmkey, GFP_KERNEL)); +} + +/* deref an mkey that can participate in ODP flow */ +static inline void mlx5r_deref_odp_mkey(struct mlx5_core_mkey *mmkey) +{ + if (refcount_dec_and_test(&mmkey->usecount)) + wake_up(&mmkey->wait); +} + +/* deref an mkey that can participate in ODP flow and wait for relese */ +static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_core_mkey *mmkey) +{ + mlx5r_deref_odp_mkey(mmkey); + wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0); +} + int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index a63ef7c66e38..db05b0e0a8d7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -158,6 +158,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) mr->mmkey.type = MLX5_MKEY_MR; mr->mmkey.key |= mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, mr->out, mkey_index)); + init_waitqueue_head(&mr->mmkey.wait); WRITE_ONCE(dev->cache.last_add, jiffies); @@ -1551,10 +1552,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, } odp->private = mr; - init_waitqueue_head(&mr->q_deferred_work); - atomic_set(&mr->num_deferred_work, 0); - err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &mr->mmkey); if (err) goto err_dereg_mr; @@ -1651,10 +1649,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); umem_dmabuf->private = mr; - init_waitqueue_head(&mr->q_deferred_work); - atomic_set(&mr->num_deferred_work, 0); - err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &mr->mmkey); if (err) goto err_dereg_mr; @@ -2330,9 +2325,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) } if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - err = xa_err(xa_store(&dev->odp_mkeys, - mlx5_base_mkey(mw->mmkey.key), &mw->mmkey, - GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &mw->mmkey); if (err) goto free_mkey; } @@ -2352,14 +2345,13 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key))) /* - * pagefault_single_data_segment() may be accessing mmw under - * SRCU if the user bound an ODP MR to this MW. + * pagefault_single_data_segment() may be accessing mmw + * if the user bound an ODP MR to this MW. */ - synchronize_srcu(&dev->odp_srcu); - } + mlx5r_deref_wait_odp_mkey(&mmw->mmkey); return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index e77b1db73893..374698186662 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -115,7 +115,6 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * xarray would be protected by the umem_mutex, however that is not * possible. Instead this uses a weaker update-then-lock pattern: * - * srcu_read_lock() * xa_store() * mutex_lock(umem_mutex) * mlx5_ib_update_xlt() @@ -126,12 +125,9 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * before destroying. * * The umem_mutex provides the acquire/release semantic needed to make - * the xa_store() visible to a racing thread. While SRCU is not - * technically required, using it gives consistent use of the SRCU - * locking around the xarray. + * the xa_store() visible to a racing thread. */ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); - lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu); for (; pklm != end; pklm++, idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -207,8 +203,8 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) } /* - * This must be called after the mr has been removed from implicit_children - * and the SRCU synchronized. NOTE: The MR does not necessarily have to be + * This must be called after the mr has been removed from implicit_children. + * NOTE: The MR does not necessarily have to be * empty here, parallel page faults could have raced with the free process and * added pages to it. */ @@ -218,19 +214,15 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; - int srcu_key; - /* implicit_child_mr's are not allowed to have deferred work */ - WARN_ON(atomic_read(&mr->num_deferred_work)); + mlx5r_deref_wait_odp_mkey(&mr->mmkey); if (need_imr_xlt) { - srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu); mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key); } dma_fence_odp_mr(mr); @@ -238,26 +230,16 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) mr->parent = NULL; mlx5_mr_cache_free(mr_to_mdev(mr), mr); ib_umem_odp_release(odp); - if (atomic_dec_and_test(&imr->num_deferred_work)) - wake_up(&imr->q_deferred_work); } static void free_implicit_child_mr_work(struct work_struct *work) { struct mlx5_ib_mr *mr = container_of(work, struct mlx5_ib_mr, odp_destroy.work); + struct mlx5_ib_mr *imr = mr->parent; free_implicit_child_mr(mr, true); -} - -static void free_implicit_child_mr_rcu(struct rcu_head *head) -{ - struct mlx5_ib_mr *mr = - container_of(head, struct mlx5_ib_mr, odp_destroy.rcu); - - /* Freeing a MR is a sleeping operation, so bounce to a work queue */ - INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); - queue_work(system_unbound_wq, &mr->odp_destroy.work); + mlx5r_deref_odp_mkey(&imr->mmkey); } static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) @@ -266,21 +248,14 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *imr = mr->parent; - xa_lock(&imr->implicit_children); - /* - * This can race with mlx5_ib_free_implicit_mr(), the first one to - * reach the xa lock wins the race and destroys the MR. - */ - if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) != - mr) - goto out_unlock; + if (!refcount_inc_not_zero(&imr->mmkey.usecount)) + return; - atomic_inc(&imr->num_deferred_work); - call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu, - free_implicit_child_mr_rcu); + xa_erase(&imr->implicit_children, idx); -out_unlock: - xa_unlock(&imr->implicit_children); + /* Freeing a MR is a sleeping operation, so bounce to a work queue */ + INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); + queue_work(system_unbound_wq, &mr->odp_destroy.work); } static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, @@ -492,6 +467,12 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mr->parent = imr; odp->private = mr; + /* + * First refcount is owned by the xarray and second refconut + * is returned to the caller. + */ + refcount_set(&mr->mmkey.usecount, 2); + err = mlx5_ib_update_xlt(mr, 0, MLX5_IMR_MTT_ENTRIES, PAGE_SHIFT, @@ -502,27 +483,28 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_mr; } - /* - * Once the store to either xarray completes any error unwind has to - * use synchronize_srcu(). Avoid this with xa_reserve() - */ - ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, - GFP_KERNEL); + xa_lock(&imr->implicit_children); + ret = __xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, + GFP_KERNEL); if (unlikely(ret)) { if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); - goto out_mr; + goto out_lock; } /* * Another thread beat us to creating the child mr, use * theirs. */ - goto out_mr; + refcount_inc(&ret->mmkey.usecount); + goto out_lock; } + xa_unlock(&imr->implicit_children); mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); return mr; +out_lock: + xa_unlock(&imr->implicit_children); out_mr: mlx5_mr_cache_free(mr_to_mdev(imr), mr); out_umem: @@ -561,8 +543,6 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->ibmr.device = &dev->ib_dev; imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; - atomic_set(&imr->num_deferred_work, 0); - init_waitqueue_head(&imr->q_deferred_work); xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, @@ -574,8 +554,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, if (err) goto out_mr; - err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), - &imr->mmkey, GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &imr->mmkey); if (err) goto out_mr; @@ -593,51 +572,24 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); struct mlx5_ib_dev *dev = mr_to_mdev(imr); - struct list_head destroy_list; struct mlx5_ib_mr *mtt; - struct mlx5_ib_mr *tmp; unsigned long idx; - INIT_LIST_HEAD(&destroy_list); - xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key)); - /* - * This stops the SRCU protected page fault path from touching either - * the imr or any children. The page fault path can only reach the - * children xarray via the imr. - */ - synchronize_srcu(&dev->odp_srcu); - /* * All work on the prefetch list must be completed, xa_erase() prevented * new work from being created. */ - wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); - + mlx5r_deref_wait_odp_mkey(&imr->mmkey); /* * At this point it is forbidden for any other thread to enter * pagefault_mr() on this imr. It is already forbidden to call * pagefault_mr() on an implicit child. Due to this additions to * implicit_children are prevented. + * In addition, any new call to destroy_unused_implicit_child_mr() + * may return immediately. */ - /* - * Block destroy_unused_implicit_child_mr() from incrementing - * num_deferred_work. - */ - xa_lock(&imr->implicit_children); - xa_for_each (&imr->implicit_children, idx, mtt) { - __xa_erase(&imr->implicit_children, idx); - list_add(&mtt->odp_destroy.elm, &destroy_list); - } - xa_unlock(&imr->implicit_children); - - /* - * Wait for any concurrent destroy_unused_implicit_child_mr() to - * complete. - */ - wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); - /* * Fence the imr before we destroy the children. This allows us to * skip updating the XLT of the imr during destroy of the child mkey @@ -645,8 +597,10 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) */ mlx5_mr_cache_invalidate(imr); - list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) + xa_for_each(&imr->implicit_children, idx, mtt) { + xa_erase(&imr->implicit_children, idx); free_implicit_child_mr(mtt, false); + } mlx5_mr_cache_free(dev, imr); ib_umem_odp_release(odp_imr); @@ -665,9 +619,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); - - wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); + mlx5r_deref_wait_odp_mkey(&mr->mmkey); dma_fence_odp_mr(mr); } @@ -686,10 +638,7 @@ void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr) /* Prevent new page faults and prefetch requests from succeeding */ xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); - /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); - - wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); + mlx5r_deref_wait_odp_mkey(&mr->mmkey); dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); mlx5_mr_cache_invalidate(mr); @@ -780,8 +729,10 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, struct mlx5_ib_mr *mtt; u64 len; + xa_lock(&imr->implicit_children); mtt = xa_load(&imr->implicit_children, idx); if (unlikely(!mtt)) { + xa_unlock(&imr->implicit_children); mtt = implicit_get_child_mr(imr, idx); if (IS_ERR(mtt)) { ret = PTR_ERR(mtt); @@ -789,6 +740,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, } upd_start_idx = min(upd_start_idx, idx); upd_len = idx - upd_start_idx + 1; + } else { + refcount_inc(&mtt->mmkey.usecount); + xa_unlock(&imr->implicit_children); } umem_odp = to_ib_umem_odp(mtt->umem); @@ -797,6 +751,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, ret = pagefault_real_mr(mtt, umem_odp, user_va, len, bytes_mapped, flags); + + mlx5r_deref_odp_mkey(&mtt->mmkey); + if (ret < 0) goto out; user_va += len; @@ -888,7 +845,6 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; @@ -980,7 +936,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 *bytes_committed, u32 *bytes_mapped) { - int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; + int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; @@ -989,14 +945,14 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, size_t offset; int ndescs; - srcu_key = srcu_read_lock(&dev->odp_srcu); - io_virt += *bytes_committed; bcnt -= *bytes_committed; next_mr: + xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key)); if (!mmkey) { + xa_unlock(&dev->odp_mkeys); mlx5_ib_dbg( dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", @@ -1009,12 +965,15 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, * faulted. */ ret = 0; - goto srcu_unlock; + goto end; } + refcount_inc(&mmkey->usecount); + xa_unlock(&dev->odp_mkeys); + if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; - goto srcu_unlock; + goto end; } switch (mmkey->type) { @@ -1023,7 +982,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); if (ret < 0) - goto srcu_unlock; + goto end; mlx5_update_odp_stats(mr, faults, ret); @@ -1038,7 +997,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) { mlx5_ib_dbg(dev, "indirection level exceeded\n"); ret = -EFAULT; - goto srcu_unlock; + goto end; } outlen = MLX5_ST_SZ_BYTES(query_mkey_out) + @@ -1049,7 +1008,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, out = kzalloc(outlen, GFP_KERNEL); if (!out) { ret = -ENOMEM; - goto srcu_unlock; + goto end; } cur_outlen = outlen; } @@ -1059,7 +1018,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen); if (ret) - goto srcu_unlock; + goto end; offset = io_virt - MLX5_GET64(query_mkey_out, out, memory_key_mkey_entry.start_addr); @@ -1073,7 +1032,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, frame = kzalloc(sizeof(*frame), GFP_KERNEL); if (!frame) { ret = -ENOMEM; - goto srcu_unlock; + goto end; } frame->key = be32_to_cpu(pklm->key); @@ -1092,7 +1051,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, default: mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type); ret = -EFAULT; - goto srcu_unlock; + goto end; } if (head) { @@ -1105,10 +1064,13 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, depth = frame->depth; kfree(frame); + mlx5r_deref_odp_mkey(mmkey); goto next_mr; } -srcu_unlock: +end: + if (mmkey) + mlx5r_deref_odp_mkey(mmkey); while (head) { frame = head; head = frame->next; @@ -1116,7 +1078,6 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, } kfree(out); - srcu_read_unlock(&dev->odp_srcu, srcu_key); *bytes_committed = 0; return ret ? ret : npages; } @@ -1824,8 +1785,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) u32 i; for (i = 0; i < work->num_sge; ++i) - if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work)) - wake_up(&work->frags[i].mr->q_deferred_work); + mlx5r_deref_odp_mkey(&work->frags[i].mr->mmkey); + kvfree(work); } @@ -1835,24 +1796,30 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; - - lockdep_assert_held(&dev->odp_srcu); + struct mlx5_ib_mr *mr = NULL; + xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) - return NULL; + goto end; mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (mr->ibmr.pd != pd) - return NULL; + if (mr->ibmr.pd != pd) { + mr = NULL; + goto end; + } /* prefetch with write-access must be supported by the MR */ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && - !mr->umem->writable) - return NULL; + !mr->umem->writable) { + mr = NULL; + goto end; + } + refcount_inc(&mmkey->usecount); +end: + xa_unlock(&dev->odp_mkeys); return mr; } @@ -1860,17 +1827,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) { struct prefetch_mr_work *work = container_of(w, struct prefetch_mr_work, work); - struct mlx5_ib_dev *dev; u32 bytes_mapped = 0; - int srcu_key; int ret; u32 i; /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ WARN_ON(!work->num_sge); - dev = mr_to_mdev(work->frags[0].mr); - /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ - srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < work->num_sge; ++i) { ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, work->frags[i].length, &bytes_mapped, @@ -1879,7 +1841,6 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) continue; mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); } - srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); } @@ -1903,9 +1864,6 @@ static bool init_prefetch_work(struct ib_pd *pd, work->num_sge = i; return false; } - - /* Keep the MR pointer will valid outside the SRCU */ - atomic_inc(&work->frags[i].mr->num_deferred_work); } work->num_sge = num_sge; return true; @@ -1916,42 +1874,35 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags, struct ib_sge *sg_list, u32 num_sge) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); u32 bytes_mapped = 0; - int srcu_key; int ret = 0; u32 i; - srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < num_sge; ++i) { struct mlx5_ib_mr *mr; mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!mr) { - ret = -ENOENT; - goto out; - } + if (!mr) + return -ENOENT; ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, &bytes_mapped, pf_flags); - if (ret < 0) - goto out; + if (ret < 0) { + mlx5r_deref_odp_mkey(&mr->mmkey); + return ret; + } mlx5_update_odp_stats(mr, prefetch, ret); + mlx5r_deref_odp_mkey(&mr->mmkey); } - ret = 0; -out: - srcu_read_unlock(&dev->odp_srcu, srcu_key); - return ret; + return 0; } int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); u32 pf_flags = 0; struct prefetch_mr_work *work; - int srcu_key; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; @@ -1967,13 +1918,10 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (!work) return -ENOMEM; - srcu_key = srcu_read_lock(&dev->odp_srcu); if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { - srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); return -EINVAL; } queue_work(system_unbound_wq, &work->work); - srcu_read_unlock(&dev->odp_srcu, srcu_key); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 9eb51f06d3ae..50af84e76fb6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -56,6 +56,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, mkey->size = MLX5_GET64(mkc, mkc, len); mkey->key |= mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); + init_waitqueue_head(&mkey->wait); mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, mkey->key); return 0; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 11558c2e99f0..b0a59a18a708 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -366,6 +366,8 @@ struct mlx5_core_mkey { u32 key; u32 pd; u32 type; + struct wait_queue_head wait; + refcount_t usecount; }; #define MLX5_24BIT_MASK ((1 << 24) - 1) From dc78074a808e2363fd0de92fbc5f5bf44c9d0db1 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 5 Feb 2021 17:05:26 -0600 Subject: [PATCH 377/414] RDMA/rxe: Fix minor coding style issues checkpatch -f found 3 warnings in RDMA/rxe 1. a missing space following switch 2. return followed by else 3. use of strlcpy() instead of strscpy(). This patch fixes each of these. In ... } elseif (...) { ... return 0; } else ... The middle block can be safely moved since it is completely independent of the other code. Link: https://lore.kernel.org/r/20210205230525.49068-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 65c8df812aeb..34ae957a315c 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -62,7 +62,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) struct rxe_port *port; int port_num = init->port_num; - switch(init->qp_type) { + switch (init->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_RC: diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 7483a33bcec5..984909e03b35 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -566,6 +566,13 @@ static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, init_send_wr(qp, &wqe->wr, ibwr); + /* local operation */ + if (unlikely(mask & WR_REG_MASK)) { + wqe->mask = mask; + wqe->state = wqe_state_posted; + return 0; + } + if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) @@ -581,13 +588,10 @@ static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, p += sge->length; } - } else if (mask & WR_REG_MASK) { - wqe->mask = mask; - wqe->state = wqe_state_posted; - return 0; - } else + } else { memcpy(wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); + } wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr : mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0; @@ -1118,7 +1122,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) struct ib_device *dev = &rxe->ib_dev; struct crypto_shash *tfm; - strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); + strscpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; From 086f580c015261b90276fee232e11d76b2d6ece6 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 5 Feb 2021 18:24:37 -0600 Subject: [PATCH 378/414] RDMA/rxe: Cleanup init_send_wqe This patch changes the type of init_send_wqe in rxe_verbs.c to void since it always returns 0. It also separates out the code that copies inline data into the send wqe as copy_inline_data_to_wqe(). Link: https://lore.kernel.org/r/20210206002437.2756-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 42 ++++++++++++--------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 984909e03b35..dee5e0e919d2 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -555,14 +555,24 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, } } -static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, +static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe, + const struct ib_send_wr *ibwr) +{ + struct ib_sge *sge = ibwr->sg_list; + u8 *p = wqe->dma.inline_data; + int i; + + for (i = 0; i < ibwr->num_sge; i++, sge++) { + memcpy(p, (void *)(uintptr_t)sge->addr, sge->length); + p += sge->length; + } +} + +static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, unsigned int length, struct rxe_send_wqe *wqe) { int num_sge = ibwr->num_sge; - struct ib_sge *sge; - int i; - u8 *p; init_send_wr(qp, &wqe->wr, ibwr); @@ -570,7 +580,7 @@ static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, if (unlikely(mask & WR_REG_MASK)) { wqe->mask = mask; wqe->state = wqe_state_posted; - return 0; + return; } if (qp_type(qp) == IB_QPT_UD || @@ -578,20 +588,11 @@ static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, qp_type(qp) == IB_QPT_GSI) memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); - if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { - p = wqe->dma.inline_data; - - sge = ibwr->sg_list; - for (i = 0; i < num_sge; i++, sge++) { - memcpy(p, (void *)(uintptr_t)sge->addr, - sge->length); - - p += sge->length; - } - } else { + if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) + copy_inline_data_to_wqe(wqe, ibwr); + else memcpy(wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); - } wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr : mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0; @@ -603,8 +604,6 @@ static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, wqe->dma.sge_offset = 0; wqe->state = wqe_state_posted; wqe->ssn = atomic_add_return(1, &qp->ssn); - - return 0; } static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, @@ -627,10 +626,7 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, } send_wqe = producer_addr(sq->queue); - - err = init_send_wqe(qp, ibwr, mask, length, send_wqe); - if (unlikely(err)) - goto err1; + init_send_wqe(qp, ibwr, mask, length, send_wqe); advance_producer(sq->queue); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); From a14e3caaaa72e9c5c91e823dde3383122215207d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 8 Feb 2021 20:33:47 +0100 Subject: [PATCH 379/414] RDMA/qedr: Remove in_irq() usage from debug output qedr_gsi_post_send() has a debug output which prints the return value of in_irq() and irqs_disabled(). The result of the in_irq(), even if invoked from an interrupt handler, is subject to change depending on the `threadirqs' command line switch. The result of irqs_disabled() is always be 1 because the function acquires spinlock_t with spin_lock_irqsave(). Remove in_irq() and irqs_disabled() from the debug output because it provides little value. Link: https://lore.kernel.org/r/20210208193347.383254-1-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr_roce_cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index f5542d703ef9..13e5e6bbec99 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -586,8 +586,8 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; qedr_inc_sw_prod(&qp->sq); DP_DEBUG(qp->dev, QEDR_MSG_GSI, - "gsi post send: opcode=%d, in_irq=%ld, irqs_disabled=%d, wr_id=%llx\n", - wr->opcode, in_irq(), irqs_disabled(), wr->wr_id); + "gsi post send: opcode=%d, wr_id=%llx\n", wr->opcode, + wr->wr_id); } else { DP_ERR(dev, "gsi post send: failed to transmit (rc=%d)\n", rc); rc = -EAGAIN; From c70f51de85302e76a59f6c6ce3dcd27b6411d23b Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Tue, 9 Feb 2021 15:04:29 +0200 Subject: [PATCH 380/414] RDMA/mlx5: Support 400Gbps IB rate in mlx5 driver Support 400Gbps IB rate in mlx5 driver. Link: https://lore.kernel.org/r/20210209130429.698237-1-leon@kernel.org Signed-off-by: Patrisious Haddad Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 358c44c5a8fc..b15342a1fff3 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3069,6 +3069,8 @@ static int ib_to_mlx5_rate_map(u8 rate) return 4; case IB_RATE_50_GBPS: return 5; + case IB_RATE_400_GBPS: + return 6; default: return rate + MLX5_STAT_RATE_OFFSET; } From e0c0840a46db9d50ba7391082d665d74f320c39f Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Thu, 28 Jan 2021 10:17:21 -0600 Subject: [PATCH 381/414] selftests/seccomp: Accept any valid fd in user_notification_addfd This test expects fds to have specific values, which works fine when the test is run standalone. However, the kselftest runner consumes a couple of extra fds for redirection when running tests, so the test fails when run via kselftest. Change the test to pass on any valid fd number. Signed-off-by: Seth Forshee Acked-by: Shuah Khan Acked-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/seccomp/seccomp_bpf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 26c72f2b61b1..9338df6f4ca8 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -4019,18 +4019,14 @@ TEST(user_notification_addfd) /* Verify we can set an arbitrary remote fd */ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); - /* - * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd), - * 4(listener), so the newly allocated fd should be 5. - */ - EXPECT_EQ(fd, 5); + EXPECT_GE(fd, 0); EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); /* Verify we can set an arbitrary remote fd with large size */ memset(&big, 0x0, sizeof(big)); big.addfd = addfd; fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); - EXPECT_EQ(fd, 6); + EXPECT_GE(fd, 0); /* Verify we can set a specific remote fd */ addfd.newfd = 42; From 8a8109f303e25a27f92c1d8edd67d7cbbc60a4eb Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Wed, 10 Feb 2021 11:48:23 +0800 Subject: [PATCH 382/414] printk: fix deadlock when kernel panic printk_safe_flush_on_panic() caused the following deadlock on our server: CPU0: CPU1: panic rcu_dump_cpu_stacks kdump_nmi_shootdown_cpus nmi_trigger_cpumask_backtrace register_nmi_handler(crash_nmi_callback) printk_safe_flush __printk_safe_flush raw_spin_lock_irqsave(&read_lock) // send NMI to other processors apic_send_IPI_allbutself(NMI_VECTOR) // NMI interrupt, dead loop crash_nmi_callback printk_safe_flush_on_panic printk_safe_flush __printk_safe_flush // deadlock raw_spin_lock_irqsave(&read_lock) DEADLOCK: read_lock is taken on CPU1 and will never get released. It happens when panic() stops a CPU by NMI while it has been in the middle of printk_safe_flush(). Handle the lock the same way as logbuf_lock. The printk_safe buffers are flushed only when both locks can be safely taken. It can avoid the deadlock _in this particular case_ at expense of losing contents of printk_safe buffers. Note: It would actually be safe to re-init the locks when all CPUs were stopped by NMI. But it would require passing this information from arch-specific code. It is not worth the complexity. Especially because logbuf_lock and printk_safe buffers have been obsoleted by the lockless ring buffer. Fixes: cf9b1106c81c ("printk/nmi: flush NMI messages on the system panic") Signed-off-by: Muchun Song Reviewed-by: Petr Mladek Cc: Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210210034823.64867-1-songmuchun@bytedance.com --- kernel/printk/printk_safe.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index a0e6f746de6c..2e9e3ed7d63e 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -45,6 +45,8 @@ struct printk_safe_seq_buf { static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); static DEFINE_PER_CPU(int, printk_context); +static DEFINE_RAW_SPINLOCK(safe_read_lock); + #ifdef CONFIG_PRINTK_NMI static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); #endif @@ -180,8 +182,6 @@ static void report_message_lost(struct printk_safe_seq_buf *s) */ static void __printk_safe_flush(struct irq_work *work) { - static raw_spinlock_t read_lock = - __RAW_SPIN_LOCK_INITIALIZER(read_lock); struct printk_safe_seq_buf *s = container_of(work, struct printk_safe_seq_buf, work); unsigned long flags; @@ -195,7 +195,7 @@ static void __printk_safe_flush(struct irq_work *work) * different CPUs. This is especially important when printing * a backtrace. */ - raw_spin_lock_irqsave(&read_lock, flags); + raw_spin_lock_irqsave(&safe_read_lock, flags); i = 0; more: @@ -232,7 +232,7 @@ static void __printk_safe_flush(struct irq_work *work) out: report_message_lost(s); - raw_spin_unlock_irqrestore(&read_lock, flags); + raw_spin_unlock_irqrestore(&safe_read_lock, flags); } /** @@ -278,6 +278,14 @@ void printk_safe_flush_on_panic(void) raw_spin_lock_init(&logbuf_lock); } + if (raw_spin_is_locked(&safe_read_lock)) { + if (num_online_cpus() > 1) + return; + + debug_locks_off(); + raw_spin_lock_init(&safe_read_lock); + } + printk_safe_flush(); } From 66040b2d5d41f85cb1a752a75260595344c5ec3b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 23 Jan 2021 21:10:25 +0100 Subject: [PATCH 383/414] smp: Process pending softirqs in flush_smp_call_function_from_idle() send_call_function_single_ipi() may wake an idle CPU without sending an IPI. The woken up CPU will process the SMP-functions in flush_smp_call_function_from_idle(). Any raised softirq from within the SMP-function call will not be processed. Should the CPU have no tasks assigned, then it will go back to idle with pending softirqs and the NOHZ will rightfully complain. Process pending softirqs on return from flush_smp_call_function_queue(). Fixes: b2a02fc43a1f4 ("smp: Optimize send_call_function_single_ipi()") Reported-by: Jens Axboe Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210123201027.3262800-2-bigeasy@linutronix.de --- kernel/smp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/smp.c b/kernel/smp.c index 1b6070bf97bb..aeb0adfa0606 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -449,6 +450,9 @@ void flush_smp_call_function_from_idle(void) local_irq_save(flags); flush_smp_call_function_queue(true); + if (local_softirq_pending()) + do_softirq(); + local_irq_restore(flags); } From f11e2bc682cc197e33bfd118178cadb61326dc0e Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Wed, 10 Feb 2021 14:25:25 +0000 Subject: [PATCH 384/414] kgdb: Remove kgdb_schedule_breakpoint() To the very best of my knowledge there has never been any in-tree code that calls this function. It exists largely to support an out-of-tree driver that provides kgdb-over-ethernet using the netpoll API. kgdboe has been out-of-tree for more than 10 years and I don't recall any serious attempt to upstream it at any point in the last five. At this stage it looks better to stop carrying this code in the kernel and integrate the code into the out-of-tree driver instead. The long term trajectory for the kernel looks likely to include effort to remove or reduce the use of tasklets (something that has also been true for the last 10 years). Thus the main real reason for this patch is to make explicit that the in-tree kgdb features do not require tasklets. Signed-off-by: Daniel Thompson Link: https://lore.kernel.org/r/20210210142525.2876648-1-daniel.thompson@linaro.org Reviewed-by: Douglas Anderson Acked-by: Davidlohr Bueso Acked-by: Jason Wessel --- include/linux/kgdb.h | 1 - kernel/debug/debug_core.c | 26 -------------------------- 2 files changed, 27 deletions(-) diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 0d6cf64c8bb1..0444b44bd156 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -325,7 +325,6 @@ extern char *kgdb_mem2hex(char *mem, char *buf, int count); extern int kgdb_hex2mem(char *buf, char *mem, int count); extern int kgdb_isremovedbreak(unsigned long addr); -extern void kgdb_schedule_breakpoint(void); extern int kgdb_has_hit_break(unsigned long addr); extern int diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 7f22c1c0ffe8..b636d517c02c 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -119,7 +119,6 @@ static DEFINE_RAW_SPINLOCK(dbg_slave_lock); */ static atomic_t masters_in_kgdb; static atomic_t slaves_in_kgdb; -static atomic_t kgdb_break_tasklet_var; atomic_t kgdb_setting_breakpoint; struct task_struct *kgdb_usethread; @@ -1084,31 +1083,6 @@ static void kgdb_unregister_callbacks(void) } } -/* - * There are times a tasklet needs to be used vs a compiled in - * break point so as to cause an exception outside a kgdb I/O module, - * such as is the case with kgdboe, where calling a breakpoint in the - * I/O driver itself would be fatal. - */ -static void kgdb_tasklet_bpt(unsigned long ing) -{ - kgdb_breakpoint(); - atomic_set(&kgdb_break_tasklet_var, 0); -} - -static DECLARE_TASKLET_OLD(kgdb_tasklet_breakpoint, kgdb_tasklet_bpt); - -void kgdb_schedule_breakpoint(void) -{ - if (atomic_read(&kgdb_break_tasklet_var) || - atomic_read(&kgdb_active) != -1 || - atomic_read(&kgdb_setting_breakpoint)) - return; - atomic_inc(&kgdb_break_tasklet_var); - tasklet_schedule(&kgdb_tasklet_breakpoint); -} -EXPORT_SYMBOL_GPL(kgdb_schedule_breakpoint); - /** * kgdb_register_io_module - register KGDB IO module * @new_dbg_io_ops: the io ops vector From a15cb2c1658417f9e8c7e84fe5d6ee0b63cbb9b0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 10 Feb 2021 11:56:24 +0000 Subject: [PATCH 385/414] doc/admin-guide: fix spelling mistake: "perfomance" -> "performance" There is a spelling mistake in the perf-security documentation. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210210115624.53551-1-colin.king@canonical.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/perf-security.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/perf-security.rst b/Documentation/admin-guide/perf-security.rst index 904e4eb37f99..34aa334320ca 100644 --- a/Documentation/admin-guide/perf-security.rst +++ b/Documentation/admin-guide/perf-security.rst @@ -72,7 +72,7 @@ monitoring and observability operations, thus, bypass *scope* permissions checks in the kernel. CAP_PERFMON implements the principle of least privilege [13]_ (POSIX 1003.1e: 2.2.2.39) for performance monitoring and observability operations in the kernel and provides a secure approach to -perfomance monitoring and observability in the system. +performance monitoring and observability in the system. For backward compatibility reasons the access to perf_events monitoring and observability operations is also open for CAP_SYS_ADMIN privileged From 3cae85f5f9e9d866488fbacb42232e518a30cc00 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 9 Feb 2021 09:23:48 -0800 Subject: [PATCH 386/414] Documentation/admin-guide: kernel-parameters: Update nohlt section Update the documentation regarding "nohlt" and indicate that it is not only for bugs, but can be useful to disable the architecture specific sleep instructions. ARM, ARM64, SuperH and Microblaze all use CONFIG_GENERIC_IDLE_POLL_SETUP which takes care of honoring the "hlt"/"nohlt" parameters. Signed-off-by: Florian Fainelli Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20210209172349.2249596-1-f.fainelli@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 50d9b9bfcd6e..e938ccf3dad3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3266,9 +3266,14 @@ parameter, xsave area per process might occupy more memory on xsaves enabled systems. - nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or - wfi(ARM) instruction doesn't work correctly and not to - use it. This is also useful when using JTAG debugger. + nohlt [ARM,ARM64,MICROBLAZE,SH] Forces the kernel to busy wait + in do_idle() and not use the arch_cpu_idle() + implementation; requires CONFIG_GENERIC_IDLE_POLL_SETUP + to be effective. This is useful on platforms where the + sleep(SH) or wfi(ARM,ARM64) instructions do not work + correctly or when doing power measurements to evalute + the impact of the sleep instructions. This is also + useful when using JTAG debugger. no_file_caps Tells the kernel not to honor file capabilities. The only way then for a file to be executed with privilege From b7592e5b82db19b72a34b471f3296ad3f651c8b9 Mon Sep 17 00:00:00 2001 From: Yorick de Wid Date: Mon, 8 Feb 2021 16:04:48 +0100 Subject: [PATCH 387/414] docs: Remove the Microsoft rhetoric There is no need to need to name Microsoft. The point is clear without that context. Signed-off-by: Yorick de Wid Link: https://lore.kernel.org/r/20210208150447.87104-1-ydewid@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/process/coding-style.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst index a1e061149e0d..42969ab37b34 100644 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@ -323,8 +323,7 @@ that counts the number of active users, you should call that Encoding the type of a function into the name (so-called Hungarian notation) is asinine - the compiler knows the types anyway and can check -those, and it only confuses the programmer. No wonder Microsoft makes buggy -programs. +those, and it only confuses the programmer. LOCAL variable names should be short, and to the point. If you have some random integer loop counter, it should probably be called ``i``. From 3c2e0a489da6a7c48ad67a246c7a287fcb4a4607 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Fri, 5 Feb 2021 17:29:51 +0530 Subject: [PATCH 388/414] docs: kernel-hacking: be more civil Remove the f-bomb from locking.rst. Let's have a moment of silence, though, as we mark the passing of the last of Rusty's once plentiful profanities in this venerable document. Signed-off-by: Bhaskar Chowdhury Link: https://lore.kernel.org/r/20210205115951.1276526-1-unixbhaskar@gmail.com [jc: rewrote changelog] Signed-off-by: Jonathan Corbet --- Documentation/kernel-hacking/locking.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst index c3448929a824..ed1284c6f078 100644 --- a/Documentation/kernel-hacking/locking.rst +++ b/Documentation/kernel-hacking/locking.rst @@ -958,7 +958,7 @@ grabs a read lock, searches a list, fails to find what it wants, drops the read lock, grabs a write lock and inserts the object has a race condition. -If you don't see why, please stay the fuck away from my code. +If you don't see why, please stay away from my code. Racing Timers: A Kernel Pastime ------------------------------- From 6778ff5b21bd8e78c8bd547fd66437cf2657fd9b Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 8 Feb 2021 06:27:12 -0600 Subject: [PATCH 389/414] iommu/amd: Fix performance counter initialization Certain AMD platforms enable power gating feature for IOMMU PMC, which prevents the IOMMU driver from updating the counter while trying to validate the PMC functionality in the init_iommu_perf_ctr(). This results in disabling PMC support and the following error message: "AMD-Vi: Unable to read/write to IOMMU perf counter" To workaround this issue, disable power gating temporarily by programming the counter source to non-zero value while validating the counter, and restore the prior state afterward. Signed-off-by: Suravee Suthikulpanit Tested-by: Tj (Elloe Linux) Link: https://lore.kernel.org/r/20210208122712.5048-1-suravee.suthikulpanit@amd.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201753 Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 45 ++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 5a6c511e91d1..5a0762377de5 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -256,6 +257,8 @@ static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); +static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value, bool is_write); static bool amd_iommu_pre_enabled = true; @@ -1697,13 +1700,11 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } -static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value, bool is_write); - -static void init_iommu_perf_ctr(struct amd_iommu *iommu) +static void __init init_iommu_perf_ctr(struct amd_iommu *iommu) { + int retry; struct pci_dev *pdev = iommu->dev; - u64 val = 0xabcd, val2 = 0, save_reg = 0; + u64 val = 0xabcd, val2 = 0, save_reg, save_src; if (!iommu_feature(iommu, FEATURE_PC)) return; @@ -1711,17 +1712,39 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) amd_iommu_pc_present = true; /* save the value to restore, if writable */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false)) + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false) || + iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, false)) + goto pc_false; + + /* + * Disable power gating by programing the performance counter + * source to 20 (i.e. counts the reads and writes from/to IOMMU + * Reserved Register [MMIO Offset 1FF8h] that are ignored.), + * which never get incremented during this init phase. + * (Note: The event is also deprecated.) + */ + val = 20; + if (iommu_pc_get_set_reg(iommu, 0, 0, 8, &val, true)) goto pc_false; /* Check if the performance counters can be written to */ - if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || - (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || - (val != val2)) - goto pc_false; + val = 0xabcd; + for (retry = 5; retry; retry--) { + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true) || + iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false) || + val2) + break; + + /* Wait about 20 msec for power gating to disable and retry. */ + msleep(20); + } /* restore */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true)) + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true) || + iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, true)) + goto pc_false; + + if (val != val2) goto pc_false; pci_info(pdev, "IOMMU performance counters supported\n"); From 0a2efafbb1c752a7041652445bc1232114409633 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 23 Jan 2021 21:10:26 +0100 Subject: [PATCH 390/414] blk-mq: Always complete remote completions requests in softirq Controllers with multiple queues have their IRQ-handelers pinned to a CPU. The core shouldn't need to complete the request on a remote CPU. Remove this case and always raise the softirq to complete the request. Reviewed-by: Christoph Hellwig Reviewed-by: Daniel Wagner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jens Axboe --- block/blk-mq.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index f285a9123a8b..90348ae51846 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -628,19 +628,7 @@ static void __blk_mq_complete_request_remote(void *data) { struct request *rq = data; - /* - * For most of single queue controllers, there is only one irq vector - * for handling I/O completion, and the only irq's affinity is set - * to all possible CPUs. On most of ARCHs, this affinity means the irq - * is handled on one specific CPU. - * - * So complete I/O requests in softirq context in case of single queue - * devices to avoid degrading I/O performance due to irqsoff latency. - */ - if (rq->q->nr_hw_queues == 1) - blk_mq_trigger_softirq(rq); - else - rq->q->mq_ops->complete(rq); + blk_mq_trigger_softirq(rq); } static inline bool blk_mq_complete_need_ipi(struct request *rq) From f9ab49184af093f0bf6c0e6583f5b25da2c09ff5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 23 Jan 2021 21:10:27 +0100 Subject: [PATCH 391/414] blk-mq: Use llist_head for blk_cpu_done With llist_head it is possible to avoid the locking (the irq-off region) when items are added. This makes it possible to add items on a remote CPU without additional locking. llist_add() returns true if the list was previously empty. This can be used to invoke the SMP function call / raise sofirq only if the first item was added (otherwise it is already pending). This simplifies the code a little and reduces the IRQ-off regions. blk_mq_raise_softirq() needs a preempt-disable section to ensure the request is enqueued on the same CPU as the softirq is raised. Some callers (USB-storage) invoke this path in preemptible context. Reviewed-by: Christoph Hellwig Reviewed-by: Daniel Wagner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jens Axboe --- block/blk-mq.c | 97 ++++++++++++++++++------------------------ include/linux/blkdev.h | 2 +- 2 files changed, 42 insertions(+), 57 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 90348ae51846..463de2981df8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -41,7 +41,7 @@ #include "blk-mq-sched.h" #include "blk-rq-qos.h" -static DEFINE_PER_CPU(struct list_head, blk_cpu_done); +static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); static void blk_mq_poll_stats_start(struct request_queue *q); static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); @@ -567,68 +567,29 @@ void blk_mq_end_request(struct request *rq, blk_status_t error) } EXPORT_SYMBOL(blk_mq_end_request); -/* - * Softirq action handler - move entries to local list and loop over them - * while passing them to the queue registered handler. - */ -static __latent_entropy void blk_done_softirq(struct softirq_action *h) +static void blk_complete_reqs(struct llist_head *list) { - struct list_head *cpu_list, local_list; + struct llist_node *entry = llist_reverse_order(llist_del_all(list)); + struct request *rq, *next; - local_irq_disable(); - cpu_list = this_cpu_ptr(&blk_cpu_done); - list_replace_init(cpu_list, &local_list); - local_irq_enable(); - - while (!list_empty(&local_list)) { - struct request *rq; - - rq = list_entry(local_list.next, struct request, ipi_list); - list_del_init(&rq->ipi_list); + llist_for_each_entry_safe(rq, next, entry, ipi_list) rq->q->mq_ops->complete(rq); - } } -static void blk_mq_trigger_softirq(struct request *rq) +static __latent_entropy void blk_done_softirq(struct softirq_action *h) { - struct list_head *list; - unsigned long flags; - - local_irq_save(flags); - list = this_cpu_ptr(&blk_cpu_done); - list_add_tail(&rq->ipi_list, list); - - /* - * If the list only contains our just added request, signal a raise of - * the softirq. If there are already entries there, someone already - * raised the irq but it hasn't run yet. - */ - if (list->next == &rq->ipi_list) - raise_softirq_irqoff(BLOCK_SOFTIRQ); - local_irq_restore(flags); + blk_complete_reqs(this_cpu_ptr(&blk_cpu_done)); } static int blk_softirq_cpu_dead(unsigned int cpu) { - /* - * If a CPU goes away, splice its entries to the current CPU - * and trigger a run of the softirq - */ - local_irq_disable(); - list_splice_init(&per_cpu(blk_cpu_done, cpu), - this_cpu_ptr(&blk_cpu_done)); - raise_softirq_irqoff(BLOCK_SOFTIRQ); - local_irq_enable(); - + blk_complete_reqs(&per_cpu(blk_cpu_done, cpu)); return 0; } - static void __blk_mq_complete_request_remote(void *data) { - struct request *rq = data; - - blk_mq_trigger_softirq(rq); + __raise_softirq_irqoff(BLOCK_SOFTIRQ); } static inline bool blk_mq_complete_need_ipi(struct request *rq) @@ -657,6 +618,30 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq) return cpu_online(rq->mq_ctx->cpu); } +static void blk_mq_complete_send_ipi(struct request *rq) +{ + struct llist_head *list; + unsigned int cpu; + + cpu = rq->mq_ctx->cpu; + list = &per_cpu(blk_cpu_done, cpu); + if (llist_add(&rq->ipi_list, list)) { + INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq); + smp_call_function_single_async(cpu, &rq->csd); + } +} + +static void blk_mq_raise_softirq(struct request *rq) +{ + struct llist_head *list; + + preempt_disable(); + list = this_cpu_ptr(&blk_cpu_done); + if (llist_add(&rq->ipi_list, list)) + raise_softirq(BLOCK_SOFTIRQ); + preempt_enable(); +} + bool blk_mq_complete_request_remote(struct request *rq) { WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); @@ -669,15 +654,15 @@ bool blk_mq_complete_request_remote(struct request *rq) return false; if (blk_mq_complete_need_ipi(rq)) { - INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq); - smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd); - } else { - if (rq->q->nr_hw_queues > 1) - return false; - blk_mq_trigger_softirq(rq); + blk_mq_complete_send_ipi(rq); + return true; } - return true; + if (rq->q->nr_hw_queues == 1) { + blk_mq_raise_softirq(rq); + return true; + } + return false; } EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote); @@ -3892,7 +3877,7 @@ static int __init blk_mq_init(void) int i; for_each_possible_cpu(i) - INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); + init_llist_head(&per_cpu(blk_cpu_done, i)); open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f94ee3089e01..89a444c5a583 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -153,7 +153,7 @@ struct request { */ union { struct hlist_node hash; /* merge hash */ - struct list_head ipi_list; + struct llist_node ipi_list; }; /* From 13791c80b0cdf54d92fc54221cdf490683b109de Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 11 Feb 2021 18:37:52 +0106 Subject: [PATCH 392/414] printk: avoid prb_first_valid_seq() where possible If message sizes average larger than expected (more than 32 characters), the data_ring will wrap before the desc_ring. Once the data_ring wraps, it will start invalidating descriptors. These invalid descriptors hang around until they are eventually recycled when the desc_ring wraps. Readers do not care about invalid descriptors, but they still need to iterate past them. If the average message size is much larger than 32 characters, then there will be many invalid descriptors preceding the valid descriptors. The function prb_first_valid_seq() always begins at the oldest descriptor and searches for the first valid descriptor. This can be rather expensive for the above scenario. And, in fact, because of its heavy usage in /dev/kmsg, there have been reports of long delays and even RCU stalls. For code that does not need to search from the oldest record, replace prb_first_valid_seq() usage with prb_read_valid_*() functions, which provide a start sequence number to search from. Fixes: 896fbe20b4e2333fb55 ("printk: use the lockless ringbuffer") Reported-by: kernel test robot Reported-by: J. Avila Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210211173152.1629-1-john.ogness@linutronix.de --- kernel/printk/printk.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c55cd1820689..c7239d169bbe 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -736,9 +736,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, logbuf_lock_irq(); } - if (user->seq < prb_first_valid_seq(prb)) { + if (r->info->seq != user->seq) { /* our last seen message is gone, return error and reset */ - user->seq = prb_first_valid_seq(prb); + user->seq = r->info->seq; ret = -EPIPE; logbuf_unlock_irq(); goto out; @@ -813,6 +813,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) static __poll_t devkmsg_poll(struct file *file, poll_table *wait) { struct devkmsg_user *user = file->private_data; + struct printk_info info; __poll_t ret = 0; if (!user) @@ -821,9 +822,9 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); logbuf_lock_irq(); - if (prb_read_valid(prb, user->seq, NULL)) { + if (prb_read_valid_info(prb, user->seq, &info, NULL)) { /* return error when data has vanished underneath us */ - if (user->seq < prb_first_valid_seq(prb)) + if (info.seq != user->seq) ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; else ret = EPOLLIN|EPOLLRDNORM; @@ -1560,6 +1561,7 @@ static void syslog_clear(void) int do_syslog(int type, char __user *buf, int len, int source) { + struct printk_info info; bool clear = false; static int saved_console_loglevel = LOGLEVEL_DEFAULT; int error; @@ -1630,9 +1632,14 @@ int do_syslog(int type, char __user *buf, int len, int source) /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: logbuf_lock_irq(); - if (syslog_seq < prb_first_valid_seq(prb)) { + if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { + /* No unread messages. */ + logbuf_unlock_irq(); + return 0; + } + if (info.seq != syslog_seq) { /* messages are gone, move to first one */ - syslog_seq = prb_first_valid_seq(prb); + syslog_seq = info.seq; syslog_partial = 0; } if (source == SYSLOG_FROM_PROC) { @@ -1644,7 +1651,6 @@ int do_syslog(int type, char __user *buf, int len, int source) error = prb_next_seq(prb) - syslog_seq; } else { bool time = syslog_partial ? syslog_time : printk_time; - struct printk_info info; unsigned int line_count; u64 seq; @@ -3425,9 +3431,11 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, goto out; logbuf_lock_irqsave(flags); - if (dumper->cur_seq < prb_first_valid_seq(prb)) { - /* messages are gone, move to first available one */ - dumper->cur_seq = prb_first_valid_seq(prb); + if (prb_read_valid_info(prb, dumper->cur_seq, &info, NULL)) { + if (info.seq != dumper->cur_seq) { + /* messages are gone, move to first available one */ + dumper->cur_seq = info.seq; + } } /* last entry */ From 4e89a78779647ca7ee2967551c599633fe9d3647 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Sun, 14 Feb 2021 10:13:46 -0600 Subject: [PATCH 393/414] lib: use KSTM_MODULE_GLOBALS macro in kselftest drivers Instead of defining the total/failed test counters manually, test drivers that are clients of kselftest should use the macro created for this purpose. Signed-off-by: Timur Tabi Reviewed-by: Petr Mladek Acked-by: Marco Elver Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210214161348.369023-2-timur@kernel.org --- lib/test_bitmap.c | 3 +-- lib/test_printf.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 4425a1dd4ef1..0ea0e8258f14 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -16,8 +16,7 @@ #include "../tools/testing/selftests/kselftest_module.h" -static unsigned total_tests __initdata; -static unsigned failed_tests __initdata; +KSTM_MODULE_GLOBALS(); static char pbl_buffer[PAGE_SIZE] __initdata; diff --git a/lib/test_printf.c b/lib/test_printf.c index 7ac87f18a10f..ad2bcfa8caa1 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -30,8 +30,8 @@ #define PAD_SIZE 16 #define FILL_CHAR '$' -static unsigned total_tests __initdata; -static unsigned failed_tests __initdata; +KSTM_MODULE_GLOBALS(); + static char *test_buffer __initdata; static char *alloced_buffer __initdata; From d9d4de2309cd1721421c6488f1bb5744d2c83a39 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Sun, 14 Feb 2021 10:13:47 -0600 Subject: [PATCH 394/414] kselftest: add support for skipped tests Update the kselftest framework to allow client drivers to specify that some tests were skipped. Signed-off-by: Timur Tabi Reviewed-by: Petr Mladek Tested-by: Petr Mladek Acked-by: Marco Elver Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210214161348.369023-3-timur@kernel.org --- tools/testing/selftests/kselftest_module.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kselftest_module.h b/tools/testing/selftests/kselftest_module.h index e8eafaf0941a..e2ea41de3f35 100644 --- a/tools/testing/selftests/kselftest_module.h +++ b/tools/testing/selftests/kselftest_module.h @@ -11,7 +11,8 @@ #define KSTM_MODULE_GLOBALS() \ static unsigned int total_tests __initdata; \ -static unsigned int failed_tests __initdata +static unsigned int failed_tests __initdata; \ +static unsigned int skipped_tests __initdata #define KSTM_CHECK_ZERO(x) do { \ total_tests++; \ @@ -21,11 +22,16 @@ static unsigned int failed_tests __initdata } \ } while (0) -static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests) +static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests, + unsigned int skipped_tests) { - if (failed_tests == 0) - pr_info("all %u tests passed\n", total_tests); - else + if (failed_tests == 0) { + if (skipped_tests) { + pr_info("skipped %u tests\n", skipped_tests); + pr_info("remaining %u tests passed\n", total_tests); + } else + pr_info("all %u tests passed\n", total_tests); + } else pr_warn("failed %u out of %u tests\n", failed_tests, total_tests); return failed_tests ? -EINVAL : 0; @@ -36,7 +42,7 @@ static int __init __module##_init(void) \ { \ pr_info("loaded.\n"); \ selftest(); \ - return kstm_report(total_tests, failed_tests); \ + return kstm_report(total_tests, failed_tests, skipped_tests); \ } \ static void __exit __module##_exit(void) \ { \ From 5ead723a20e0447bc7db33dc3070b420e5f80aa6 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Sun, 14 Feb 2021 10:13:48 -0600 Subject: [PATCH 395/414] lib/vsprintf: no_hash_pointers prints all addresses as unhashed If the no_hash_pointers command line parameter is set, then printk("%p") will print pointers as unhashed, which is useful for debugging purposes. This change applies to any function that uses vsprintf, such as print_hex_dump() and seq_buf_printf(). A large warning message is displayed if this option is enabled. Unhashed pointers expose kernel addresses, which can be a security risk. Also update test_printf to skip the hashed pointer tests if the command-line option is set. Signed-off-by: Timur Tabi Acked-by: Petr Mladek Acked-by: Randy Dunlap Acked-by: Sergey Senozhatsky Acked-by: Vlastimil Babka Acked-by: Marco Elver Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210214161348.369023-4-timur@kernel.org --- .../admin-guide/kernel-parameters.txt | 15 ++++++++ lib/test_printf.c | 8 +++++ lib/vsprintf.c | 36 +++++++++++++++++-- 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c722ec19cd00..c549bc789108 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3281,6 +3281,21 @@ in certain environments such as networked servers or real-time systems. + no_hash_pointers + Force pointers printed to the console or buffers to be + unhashed. By default, when a pointer is printed via %p + format string, that pointer is "hashed", i.e. obscured + by hashing the pointer value. This is a security feature + that hides actual kernel addresses from unprivileged + users, but it also makes debugging the kernel more + difficult since unequal pointers can no longer be + compared. However, if this command-line option is + specified, then all normal pointers will have their true + value printed. Pointers printed via %pK may still be + hashed. This option should only be specified when + debugging the kernel. Please do not use on production + kernels. + nohibernate [HIBERNATION] Disable hibernation and resume. nohz= [KNL] Boottime enable/disable dynamic ticks diff --git a/lib/test_printf.c b/lib/test_printf.c index ad2bcfa8caa1..a6755798e9e6 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -35,6 +35,8 @@ KSTM_MODULE_GLOBALS(); static char *test_buffer __initdata; static char *alloced_buffer __initdata; +extern bool no_hash_pointers; + static int __printf(4, 0) __init do_test(int bufsize, const char *expect, int elen, const char *fmt, va_list ap) @@ -301,6 +303,12 @@ plain(void) { int err; + if (no_hash_pointers) { + pr_warn("skipping plain 'p' tests"); + skipped_tests += 2; + return; + } + err = plain_hash(); if (err) { pr_warn("plain 'p' does not appear to be hashed\n"); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3b53c73580c5..41ddc353ebb8 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2090,6 +2090,32 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, return widen_string(buf, buf - buf_start, end, spec); } +/* Disable pointer hashing if requested */ +bool no_hash_pointers __ro_after_init; +EXPORT_SYMBOL_GPL(no_hash_pointers); + +static int __init no_hash_pointers_enable(char *str) +{ + no_hash_pointers = true; + + pr_warn("**********************************************************\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("** **\n"); + pr_warn("** This system shows unhashed kernel memory addresses **\n"); + pr_warn("** via the console, logs, and other interfaces. This **\n"); + pr_warn("** might reduce the security of your system. **\n"); + pr_warn("** **\n"); + pr_warn("** If you see this message and you are not debugging **\n"); + pr_warn("** the kernel, report this immediately to your system **\n"); + pr_warn("** administrator! **\n"); + pr_warn("** **\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("**********************************************************\n"); + + return 0; +} +early_param("no_hash_pointers", no_hash_pointers_enable); + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -2297,8 +2323,14 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, } } - /* default is to _not_ leak addresses, hash before printing */ - return ptr_to_id(buf, end, ptr, spec); + /* + * default is to _not_ leak addresses, so hash before printing, + * unless no_hash_pointers is specified on the command line. + */ + if (unlikely(no_hash_pointers)) + return pointer_string(buf, end, ptr, spec); + else + return ptr_to_id(buf, end, ptr, spec); } /* From 633d61021298f690f823ff51bcdab906e3644fe1 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 28 Jan 2021 18:46:47 +0000 Subject: [PATCH 396/414] RDMA/ipoib: Remove racy Subnet Manager sendonly join checks When a system receives a REREG event from the SM, then the SM information in the kernel is marked as invalid and a request is sent to the SM to update the information. The SM information is invalid in that time period. However, receiving a REREG also occurs simultaneously in user space applications that are now trying to rejoin the multicast groups. Some of those may be sendonly multicast groups which are then failing. If the SM information is invalid then ib_sa_sendonly_fullmem_support() returns false. That is wrong because it just means that we do not know yet if the potentially new SM supports sendonly joins. Sendonly join was introduced in 2015 and all the Subnet managers have supported it ever since. So there is no point in checking if a subnet manager supports it. Should an old opensm get a request for a sendonly join then the request will fail. The code that is removed here accomodated that situation and fell back to a full join. Falling back to a full join is problematic in itself. The reason to use the sendonly join was to reduce the traffic on the Infiniband fabric otherwise one could have just stayed with the regular join. So this patch may cause users of very old opensms to discover that lots of traffic needlessly crosses their IB fabrics. Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2101281845160.13303@www.lameter.com Signed-off-by: Christoph Lameter Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 11 --------- drivers/infiniband/core/sa_query.c | 24 ------------------- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 -- .../infiniband/ulp/ipoib/ipoib_multicast.c | 13 +--------- include/rdma/ib_sa.h | 4 ---- 6 files changed, 1 insertion(+), 54 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c51b84b2d2f3..58ee7004c8d8 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4542,17 +4542,6 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = mc->join_state; - if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) && - (!ib_sa_sendonly_fullmem_support(&sa_client, - id_priv->id.device, - id_priv->id.port_num))) { - dev_warn( - &id_priv->id.device->dev, - "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n", - id_priv->id.port_num); - return -EOPNOTSUPP; - } - comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 5bd047042e68..9ef1a355131b 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1951,30 +1951,6 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, } EXPORT_SYMBOL(ib_sa_guid_info_rec_query); -bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client, - struct ib_device *device, - u8 port_num) -{ - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port; - bool ret = false; - unsigned long flags; - - if (!sa_dev) - return ret; - - port = &sa_dev->port[port_num - sa_dev->start_port]; - - spin_lock_irqsave(&port->classport_lock, flags); - if ((port->classport_info.valid) && - (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB)) - ret = ib_get_cpi_capmask2(&port->classport_info.data.ib) - & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT; - spin_unlock_irqrestore(&port->classport_lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support); - struct ib_classport_info_context { struct completion done; struct ib_sa_query *sa_query; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 3440dc48d02c..179ff1d068e5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -413,7 +413,6 @@ struct ipoib_dev_priv { u64 hca_caps; struct ipoib_ethtool_st ethtool; unsigned int max_send_sge; - bool sm_fullmember_sendonly_support; const struct net_device_ops *rn_ops; }; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a6f413491321..e16b40c09f82 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -141,8 +141,6 @@ int ipoib_open(struct net_device *dev) set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); - priv->sm_fullmember_sendonly_support = false; - if (ipoib_ib_dev_open(dev)) { if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) return 0; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index e3e4447c0f51..5b3154503bf4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -333,15 +333,6 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work) ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); return; } - /* - * Check if can send sendonly MCG's with sendonly-fullmember join state. - * It done here after the successfully join to the broadcast group, - * because the broadcast group must always be joined first and is always - * re-joined if the SM changes substantially. - */ - priv->sm_fullmember_sendonly_support = - ib_sa_sendonly_fullmem_support(&ipoib_sa_client, - priv->ca, priv->port); /* * Take rtnl_lock to avoid racing with ipoib_stop() and * turning the carrier back on while a device is being @@ -537,9 +528,7 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) * most closely emulates the behavior, from a user space * application perspective, of Ethernet multicast operation. */ - if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && - priv->sm_fullmember_sendonly_support) - /* SM supports sendonly-fullmember, otherwise fallback to full-member */ + if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) rec.join_state = SENDONLY_FULLMEMBER_JOIN; } spin_unlock_irq(&priv->lock); diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 693285e76f13..4c52c2fd22a1 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -547,10 +547,6 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, void *context), void *context, struct ib_sa_query **sa_query); -bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client, - struct ib_device *device, - u8 port_num); - static inline bool sa_path_is_roce(struct sa_path_rec *rec) { return ((rec->rec_type == SA_PATH_REC_TYPE_ROCE_V1) || From 229557230c760e25b6af79709aa85d30de4c8500 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Sun, 7 Feb 2021 16:55:39 +0800 Subject: [PATCH 397/414] RDMA/hns: Remove unused member and variable of CMDQ last_status of structure hns_roce_v2_cmq has never been used, and the variable named 'complete' in __hns_roce_cmq_send() is meaningless. Fixes: a04ff739f2a9 ("RDMA/hns: Add command queue support for hip08 RoCE driver") Link: https://lore.kernel.org/r/1612688143-28226-2-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 +++------ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 - 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d9f94b454085..4bbbc240e072 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1314,7 +1314,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; struct hns_roce_cmq_desc *desc_to_use; - bool complete = false; u32 timeout = 0; int handle = 0; u16 desc_ret; @@ -1361,7 +1360,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, } if (hns_roce_cmq_csq_done(hr_dev)) { - complete = true; handle = 0; while (handle < num) { /* get the result of hardware write back */ @@ -1373,16 +1371,15 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, ret = 0; else ret = -EIO; - priv->cmq.last_status = desc_ret; + ntc++; handle++; if (ntc == csq->desc_num) ntc = 0; } - } - - if (!complete) + } else { ret = -EAGAIN; + } /* clean the command send queue */ handle = hns_roce_cmq_csq_clean(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index f29438cff456..631f6798a8fd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1890,7 +1890,6 @@ struct hns_roce_v2_cmq { struct hns_roce_v2_cmq_ring csq; struct hns_roce_v2_cmq_ring crq; u16 tx_timeout; - u16 last_status; }; enum hns_roce_link_table_type { From 8f86e2eadac968200a6ab1d7074fc0f5cbc1e075 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Sun, 7 Feb 2021 16:55:40 +0800 Subject: [PATCH 398/414] RDMA/hns: Fixes missing error code of CMDQ When posting a multi-descriptors command, the error code of previous failed descriptors may be rewrote to 0 by a later successful descriptor. Fixes: a04ff739f2a9 ("RDMA/hns: Add command queue support for hip08 RoCE driver") Link: https://lore.kernel.org/r/1612688143-28226-3-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4bbbc240e072..ba5272249dcd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1317,7 +1317,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, u32 timeout = 0; int handle = 0; u16 desc_ret; - int ret = 0; + int ret; int ntc; spin_lock_bh(&csq->lock); @@ -1361,15 +1361,14 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, if (hns_roce_cmq_csq_done(hr_dev)) { handle = 0; + ret = 0; while (handle < num) { /* get the result of hardware write back */ desc_to_use = &csq->desc[ntc]; desc[handle] = *desc_to_use; dev_dbg(hr_dev->dev, "Get cmq desc:\n"); desc_ret = le16_to_cpu(desc[handle].retval); - if (desc_ret == CMD_EXEC_SUCCESS) - ret = 0; - else + if (unlikely(desc_ret != CMD_EXEC_SUCCESS)) ret = -EIO; ntc++; From 563aeb226630610707980e8abe20af1e6f410ce4 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Sun, 7 Feb 2021 16:55:41 +0800 Subject: [PATCH 399/414] RDMA/hns: Remove redundant operations on CMDQ CMDQ works serially, after each successful transmission, the head and tail pointers will be equal, so there is no need to check whether the queue is full. At the same time, since the descriptor of each transmission is new, there is no need to perform a cleanup operation. Then, the field named next_to_clean in structure hns_roce_v2_cmq_ring is redundant. Fixes: a04ff739f2a9 ("RDMA/hns: Add command queue support for hip08 RoCE driver") Link: https://lore.kernel.org/r/1612688143-28226-4-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 54 ++-------------------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 - 2 files changed, 5 insertions(+), 50 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ba5272249dcd..c15ae3e2e49a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1130,15 +1130,6 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) return 0; } -static int hns_roce_cmq_space(struct hns_roce_v2_cmq_ring *ring) -{ - int ntu = ring->next_to_use; - int ntc = ring->next_to_clean; - int used = (ntu - ntc + ring->desc_num) % ring->desc_num; - - return ring->desc_num - used - 1; -} - static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev, struct hns_roce_v2_cmq_ring *ring) { @@ -1178,7 +1169,6 @@ static int hns_roce_init_cmq_ring(struct hns_roce_dev *hr_dev, bool ring_type) &priv->cmq.csq : &priv->cmq.crq; ring->flag = ring_type; - ring->next_to_clean = 0; ring->next_to_use = 0; return hns_roce_alloc_cmq_desc(hr_dev, ring); @@ -1284,30 +1274,6 @@ static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev) return head == priv->cmq.csq.next_to_use; } -static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v2_priv *priv = hr_dev->priv; - struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; - struct hns_roce_cmq_desc *desc; - u16 ntc = csq->next_to_clean; - u32 head; - int clean = 0; - - desc = &csq->desc[ntc]; - head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); - while (head != ntc) { - memset(desc, 0, sizeof(*desc)); - ntc++; - if (ntc == csq->desc_num) - ntc = 0; - desc = &csq->desc[ntc]; - clean++; - } - csq->next_to_clean = ntc; - - return clean; -} - static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { @@ -1322,15 +1288,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, spin_lock_bh(&csq->lock); - if (num > hns_roce_cmq_space(csq)) { - spin_unlock_bh(&csq->lock); - return -EBUSY; - } - - /* - * Record the location of desc in the cmq for this time - * which will be use for hardware to write back - */ ntc = csq->next_to_use; while (handle < num) { @@ -1377,15 +1334,14 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, ntc = 0; } } else { + /* FW/HW reset or incorrect number of desc */ + ntc = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); + dev_warn(hr_dev->dev, "CMDQ move head from %d to %d\n", + csq->next_to_use, ntc); + csq->next_to_use = ntc; ret = -EAGAIN; } - /* clean the command send queue */ - handle = hns_roce_cmq_csq_clean(hr_dev); - if (handle != num) - dev_warn(hr_dev->dev, "Cleaned %d, need to clean %d\n", - handle, num); - spin_unlock_bh(&csq->lock); return ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 631f6798a8fd..d271a10cfaf4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1881,7 +1881,6 @@ struct hns_roce_v2_cmq_ring { u16 buf_size; u16 desc_num; int next_to_use; - int next_to_clean; u8 flag; spinlock_t lock; /* command queue lock */ }; From 292b3352bd5bd0abeba3e8e7b5ae5acb8f7df4e0 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Sun, 7 Feb 2021 16:55:42 +0800 Subject: [PATCH 400/414] RDMA/hns: Adjust fields and variables about CMDQ tail/head The register 0x07014 is actually the head pointer of CMDQ, and 0x07010 means tail pointer. Current definitions are confusing, so rename them and related variables. The next_to_use of structure hns_roce_v2_cmq_ring has the same semantics as head, merge them into one member. The next_to_clean of structure hns_roce_v2_cmq_ring has the same semantics as tail. After deleting next_to_clean, tail should also be deleted. Link: https://lore.kernel.org/r/1612688143-28226-5-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_common.h | 4 +-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 37 +++++++++++---------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 -- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 3ca6e88f77bf..23c438cef40d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -364,8 +364,8 @@ #define ROCEE_TX_CMQ_BASEADDR_L_REG 0x07000 #define ROCEE_TX_CMQ_BASEADDR_H_REG 0x07004 #define ROCEE_TX_CMQ_DEPTH_REG 0x07008 -#define ROCEE_TX_CMQ_TAIL_REG 0x07010 -#define ROCEE_TX_CMQ_HEAD_REG 0x07014 +#define ROCEE_TX_CMQ_HEAD_REG 0x07010 +#define ROCEE_TX_CMQ_TAIL_REG 0x07014 #define ROCEE_RX_CMQ_BASEADDR_L_REG 0x07018 #define ROCEE_RX_CMQ_BASEADDR_H_REG 0x0701c diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c15ae3e2e49a..3b89436b01c0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1169,7 +1169,7 @@ static int hns_roce_init_cmq_ring(struct hns_roce_dev *hr_dev, bool ring_type) &priv->cmq.csq : &priv->cmq.crq; ring->flag = ring_type; - ring->next_to_use = 0; + ring->head = 0; return hns_roce_alloc_cmq_desc(hr_dev, ring); } @@ -1268,10 +1268,10 @@ static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc, static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev) { - u32 head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); + u32 tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG); struct hns_roce_v2_priv *priv = hr_dev->priv; - return head == priv->cmq.csq.next_to_use; + return tail == priv->cmq.csq.head; } static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, @@ -1283,25 +1283,25 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, u32 timeout = 0; int handle = 0; u16 desc_ret; + u32 tail; int ret; - int ntc; spin_lock_bh(&csq->lock); - ntc = csq->next_to_use; + tail = csq->head; while (handle < num) { - desc_to_use = &csq->desc[csq->next_to_use]; + desc_to_use = &csq->desc[csq->head]; *desc_to_use = desc[handle]; dev_dbg(hr_dev->dev, "set cmq desc:\n"); - csq->next_to_use++; - if (csq->next_to_use == csq->desc_num) - csq->next_to_use = 0; + csq->head++; + if (csq->head == csq->desc_num) + csq->head = 0; handle++; } /* Write to hardware */ - roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, csq->next_to_use); + roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, csq->head); /* * If the command is sync, wait for the firmware to write back, @@ -1321,24 +1321,25 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, ret = 0; while (handle < num) { /* get the result of hardware write back */ - desc_to_use = &csq->desc[ntc]; + desc_to_use = &csq->desc[tail]; desc[handle] = *desc_to_use; dev_dbg(hr_dev->dev, "Get cmq desc:\n"); desc_ret = le16_to_cpu(desc[handle].retval); if (unlikely(desc_ret != CMD_EXEC_SUCCESS)) ret = -EIO; - ntc++; + tail++; handle++; - if (ntc == csq->desc_num) - ntc = 0; + if (tail == csq->desc_num) + tail = 0; } } else { /* FW/HW reset or incorrect number of desc */ - ntc = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); - dev_warn(hr_dev->dev, "CMDQ move head from %d to %d\n", - csq->next_to_use, ntc); - csq->next_to_use = ntc; + tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG); + dev_warn(hr_dev->dev, "CMDQ move tail from %d to %d\n", + csq->head, tail); + csq->head = tail; + ret = -EAGAIN; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index d271a10cfaf4..7a0f8acc09f6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1876,11 +1876,8 @@ struct hns_roce_v2_cmq_ring { dma_addr_t desc_dma_addr; struct hns_roce_cmq_desc *desc; u32 head; - u32 tail; - u16 buf_size; u16 desc_num; - int next_to_use; u8 flag; spinlock_t lock; /* command queue lock */ }; From 5e9914c003885402a3eb138204dba3eea997ecde Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Sun, 7 Feb 2021 16:55:43 +0800 Subject: [PATCH 401/414] RDMA/hns: Refactor process of posting CMDQ Simplify __hns_roce_cmq_send() then remove the redundant variables. Link: https://lore.kernel.org/r/1612688143-28226-6-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 42 +++++++++------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 3b89436b01c0..012076aa35f5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1279,32 +1279,26 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; - struct hns_roce_cmq_desc *desc_to_use; u32 timeout = 0; - int handle = 0; u16 desc_ret; u32 tail; int ret; + int i; spin_lock_bh(&csq->lock); tail = csq->head; - while (handle < num) { - desc_to_use = &csq->desc[csq->head]; - *desc_to_use = desc[handle]; - dev_dbg(hr_dev->dev, "set cmq desc:\n"); - csq->head++; + for (i = 0; i < num; i++) { + csq->desc[csq->head++] = desc[i]; if (csq->head == csq->desc_num) csq->head = 0; - handle++; } /* Write to hardware */ roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, csq->head); - /* - * If the command is sync, wait for the firmware to write back, + /* If the command is sync, wait for the firmware to write back, * if multi descriptors to be sent, use the first one to check */ if (le16_to_cpu(desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) { @@ -1312,26 +1306,24 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, if (hns_roce_cmq_csq_done(hr_dev)) break; udelay(1); - timeout++; - } while (timeout < priv->cmq.tx_timeout); + } while (++timeout < priv->cmq.tx_timeout); } if (hns_roce_cmq_csq_done(hr_dev)) { - handle = 0; - ret = 0; - while (handle < num) { - /* get the result of hardware write back */ - desc_to_use = &csq->desc[tail]; - desc[handle] = *desc_to_use; - dev_dbg(hr_dev->dev, "Get cmq desc:\n"); - desc_ret = le16_to_cpu(desc[handle].retval); - if (unlikely(desc_ret != CMD_EXEC_SUCCESS)) - ret = -EIO; - - tail++; - handle++; + for (ret = 0, i = 0; i < num; i++) { + /* check the result of hardware write back */ + desc[i] = csq->desc[tail++]; if (tail == csq->desc_num) tail = 0; + + desc_ret = le16_to_cpu(desc[i].retval); + if (likely(desc_ret == CMD_EXEC_SUCCESS)) + continue; + + dev_err_ratelimited(hr_dev->dev, + "Cmdq IO error, opcode = %x, return = %x\n", + desc->opcode, desc_ret); + ret = -EIO; } } else { /* FW/HW reset or incorrect number of desc */ From bf656b029f88ca4b00e2b84c752813f2cb306174 Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Wed, 10 Feb 2021 10:33:44 +0800 Subject: [PATCH 402/414] RDMA/hns: Adjust definition of FRMR fields FRMR is not well-supported on HIP08, it is re-designed for HIP09 and the position of related fields is changed. Then the ULPs should be forbidden to use FRMR on older hardwares. Link: https://lore.kernel.org/r/1612924424-28217-1-git-send-email-liweihang@huawei.com Signed-off-by: Yixing Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 45 +++++++++++++--------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 12 +++--- drivers/infiniband/hw/hns/hns_roce_main.c | 3 +- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 012076aa35f5..c3934abeb260 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -99,16 +99,16 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, u64 pbl_ba; /* use ib_access_flags */ - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S, - wr->access & IB_ACCESS_MW_BIND ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S, - wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RR_S, - wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RW_S, - wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_LW_S, - wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_BIND_EN_S, + !!(wr->access & IB_ACCESS_MW_BIND)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_ATOMIC_S, + !!(wr->access & IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_RR_S, + !!(wr->access & IB_ACCESS_REMOTE_READ)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_RW_S, + !!(wr->access & IB_ACCESS_REMOTE_WRITE)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_LW_S, + !!(wr->access & IB_ACCESS_LOCAL_WRITE)); /* Data structure reuse may lead to confusion */ pbl_ba = mr->pbl_mtr.hem_cfg.root_ba; @@ -121,12 +121,10 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, rc_sq_wqe->va = cpu_to_le64(wr->mr->iova); fseg->pbl_size = cpu_to_le32(mr->npages); - roce_set_field(fseg->mode_buf_pg_sz, - V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M, + roce_set_field(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S, to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); - roce_set_bit(fseg->mode_buf_pg_sz, - V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); } static void set_atomic_seg(const struct ib_send_wr *wr, @@ -522,10 +520,12 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, return 0; } -static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, +static int set_rc_opcode(struct hns_roce_dev *hr_dev, + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, const struct ib_send_wr *wr) { u32 ib_op = wr->opcode; + int ret = 0; rc_sq_wqe->immtdata = get_immtdata(wr); @@ -545,7 +545,10 @@ static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr); break; case IB_WR_REG_MR: - set_frmr_seg(rc_sq_wqe, reg_wr(wr)); + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + set_frmr_seg(rc_sq_wqe, reg_wr(wr)); + else + ret = -EOPNOTSUPP; break; case IB_WR_LOCAL_INV: roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); @@ -554,19 +557,23 @@ static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); break; default: - return -EINVAL; + ret = -EINVAL; } + if (unlikely(ret)) + return ret; + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); - return 0; + return ret; } static inline int set_rc_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, unsigned int owner_bit) { + struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; unsigned int curr_idx = *sge_idx; unsigned int valid_num_sge; @@ -577,7 +584,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, rc_sq_wqe->msg_len = cpu_to_le32(msg_len); - ret = set_rc_opcode(rc_sq_wqe, wr); + ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr); if (WARN_ON(ret)) return ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 7a0f8acc09f6..39621fb6ec16 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1255,15 +1255,15 @@ struct hns_roce_v2_rc_send_wqe { #define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12 -#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19 +#define V2_RC_FRMR_WQE_BYTE_40_BIND_EN_S 10 -#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20 +#define V2_RC_FRMR_WQE_BYTE_40_ATOMIC_S 11 -#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21 +#define V2_RC_FRMR_WQE_BYTE_40_RR_S 12 -#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22 +#define V2_RC_FRMR_WQE_BYTE_40_RW_S 13 -#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23 +#define V2_RC_FRMR_WQE_BYTE_40_LW_S 14 #define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31 @@ -1280,7 +1280,7 @@ struct hns_roce_v2_rc_send_wqe { struct hns_roce_wqe_frmr_seg { __le32 pbl_size; - __le32 mode_buf_pg_sz; + __le32 byte_40; }; #define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4 diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c29215a8de3c..c9c0836394a2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -201,7 +201,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_srq_sge = hr_dev->caps.max_srq_sges; } - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR && + hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA; } From 1a93e848b730abd048c9b0fd60c6efb4da64ac17 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 10 Feb 2021 17:38:21 +0800 Subject: [PATCH 403/414] RDMA/qedr: Use true and false for bool variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following coccicheck warning: ./drivers/infiniband/hw/qedr/qedr.h:629:9-10: WARNING: return of 0/1 in function 'qedr_qp_has_rq' with return type bool. ./drivers/infiniband/hw/qedr/qedr.h:620:9-10: WARNING: return of 0/1 in function 'qedr_qp_has_sq' with return type bool. Link: https://lore.kernel.org/r/1612949901-109873-1-git-send-email-jiapeng.chong@linux.alibaba.com Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Acked-by: Michal Kalderon  Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 9dde70373a55..3cb4febaad0f 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -617,18 +617,18 @@ static inline bool qedr_qp_has_srq(struct qedr_qp *qp) static inline bool qedr_qp_has_sq(struct qedr_qp *qp) { if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_TGT) - return 0; + return false; - return 1; + return true; } static inline bool qedr_qp_has_rq(struct qedr_qp *qp) { if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT || qedr_qp_has_srq(qp)) - return 0; + return false; - return 1; + return true; } static inline struct qedr_user_mmap_entry * From 168e4cd94983091a4a0c9b9de285ee15e3aa581c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 10 Feb 2021 17:14:21 +0200 Subject: [PATCH 404/414] RDMA/core: Fix kernel doc warnings for ib_port_immutable_read() drivers/infiniband/core/device.c:859: warning: Function parameter or member 'dev' not described in 'ib_port_immutable_read' drivers/infiniband/core/device.c:859: warning: Function parameter or member 'port' not described in 'ib_port_immutable_read' Fixes: 7416790e2245 ("RDMA/core: Introduce and use API to read port immutable data") Link: https://lore.kernel.org/r/20210210151421.1108809-1-leon@kernel.org Reported-by: Stephen Rothwell Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 051c018fb73c..aac0fe14e1d9 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -850,9 +850,9 @@ static int setup_port_data(struct ib_device *device) /** * ib_port_immutable_read() - Read rdma port's immutable data - * @dev - IB device - * @port - port number whose immutable data to read. It starts with index 1 and - * valid upto including rdma_end_port(). + * @dev: IB device + * @port: port number whose immutable data to read. It starts with index 1 and + * valid upto including rdma_end_port(). */ const struct ib_port_immutable* ib_port_immutable_read(struct ib_device *dev, unsigned int port) From fe454dc31e84f8c14cb8942fcb61666c9f40745b Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 11 Feb 2021 11:05:17 +0200 Subject: [PATCH 405/414] RDMA/ucma: Fix use-after-free bug in ucma_create_uevent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ucma_process_join() allocates struct ucma_multicast mc and frees it if an error occurs during its run. Specifically, if an error occurs in copy_to_user(), a use-after-free might happen in the following scenario: 1. mc struct is allocated. 2. rdma_join_multicast() is called and succeeds. During its run, cma_iboe_join_multicast() enqueues a work that will later use the aforementioned mc struct. 3. copy_to_user() is called and fails. 4. mc struct is deallocated. 5. The work that was enqueued by cma_iboe_join_multicast() is run and calls ucma_create_uevent() which tries to access mc struct (which is freed by now). Fix this bug by cancelling the work enqueued by cma_iboe_join_multicast(). Since cma_work_handler() frees struct cma_work, we don't use it in cma_iboe_join_multicast() so we can safely cancel the work later. The following syzkaller report revealed it: BUG: KASAN: use-after-free in ucma_create_uevent+0x2dd/0x;3f0 drivers/infiniband/core/ucma.c:272 Read of size 8 at addr ffff88810b3ad110 by task kworker/u8:1/108 CPU: 1 PID: 108 Comm: kworker/u8:1 Not tainted 5.10.0-rc6+ #257 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: rdma_cm cma_work_handler Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xbe/0xf9 lib/dump_stack.c:118 print_address_description.constprop.0+0x3e/0×60 mm/kasan/report.c:385 __kasan_report mm/kasan/report.c:545 [inline] kasan_report.cold+0x1f/0×37 mm/kasan/report.c:562 ucma_create_uevent+0x2dd/0×3f0 drivers/infiniband/core/ucma.c:272 ucma_event_handler+0xb7/0×3c0 drivers/infiniband/core/ucma.c:349 cma_cm_event_handler+0x5d/0×1c0 drivers/infiniband/core/cma.c:1977 cma_work_handler+0xfa/0×190 drivers/infiniband/core/cma.c:2718 process_one_work+0x54c/0×930 kernel/workqueue.c:2272 worker_thread+0x82/0×830 kernel/workqueue.c:2418 kthread+0x1ca/0×220 kernel/kthread.c:292 ret_from_fork+0x1f/0×30 arch/x86/entry/entry_64.S:296 Allocated by task 359: kasan_save_stack+0x1b/0×40 mm/kasan/common.c:48 kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc mm/kasan/common.c:461 [inline] __kasan_kmalloc.constprop.0+0xc2/0xd0 mm/kasan/common.c:434 kmalloc include/linux/slab.h:552 [inline] kzalloc include/linux/slab.h:664 [inline] ucma_process_join+0x16e/0×3f0 drivers/infiniband/core/ucma.c:1453 ucma_join_multicast+0xda/0×140 drivers/infiniband/core/ucma.c:1538 ucma_write+0x1f7/0×280 drivers/infiniband/core/ucma.c:1724 vfs_write fs/read_write.c:603 [inline] vfs_write+0x191/0×4c0 fs/read_write.c:585 ksys_write+0x1a1/0×1e0 fs/read_write.c:658 do_syscall_64+0x2d/0×40 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 359: kasan_save_stack+0x1b/0×40 mm/kasan/common.c:48 kasan_set_track+0x1c/0×30 mm/kasan/common.c:56 kasan_set_free_info+0x1b/0×30 mm/kasan/generic.c:355 __kasan_slab_free+0x112/0×160 mm/kasan/common.c:422 slab_free_hook mm/slub.c:1544 [inline] slab_free_freelist_hook mm/slub.c:1577 [inline] slab_free mm/slub.c:3142 [inline] kfree+0xb3/0×3e0 mm/slub.c:4124 ucma_process_join+0x22d/0×3f0 drivers/infiniband/core/ucma.c:1497 ucma_join_multicast+0xda/0×140 drivers/infiniband/core/ucma.c:1538 ucma_write+0x1f7/0×280 drivers/infiniband/core/ucma.c:1724 vfs_write fs/read_write.c:603 [inline] vfs_write+0x191/0×4c0 fs/read_write.c:585 ksys_write+0x1a1/0×1e0 fs/read_write.c:658 do_syscall_64+0x2d/0×40 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff88810b3ad100 which belongs to the cache kmalloc-192 of size 192 The buggy address is located 16 bytes inside of 192-byte region [ffff88810b3ad100, ffff88810b3ad1c0) Fixes: b5de0c60cc30 ("RDMA/cma: Fix use after free race in roce multicast join") Link: https://lore.kernel.org/r/20210211090517.1278415-1-leon@kernel.org Reported-by: Amit Matityahu Signed-off-by: Avihai Horon Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 70 ++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 58ee7004c8d8..94096511599f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -352,7 +352,13 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) struct cma_multicast { struct rdma_id_private *id_priv; - struct ib_sa_multicast *sa_mc; + union { + struct ib_sa_multicast *sa_mc; + struct { + struct work_struct work; + struct rdma_cm_event event; + } iboe_join; + }; struct list_head list; void *context; struct sockaddr_storage addr; @@ -1823,6 +1829,8 @@ static void destroy_mc(struct rdma_id_private *id_priv, cma_igmp_send(ndev, &mgid, false); dev_put(ndev); } + + cancel_work_sync(&mc->iboe_join.work); } kfree(mc); } @@ -2683,6 +2691,28 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, return (id_priv->query_id < 0) ? id_priv->query_id : 0; } +static void cma_iboe_join_work_handler(struct work_struct *work) +{ + struct cma_multicast *mc = + container_of(work, struct cma_multicast, iboe_join.work); + struct rdma_cm_event *event = &mc->iboe_join.event; + struct rdma_id_private *id_priv = mc->id_priv; + int ret; + + mutex_lock(&id_priv->handler_mutex); + if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING || + READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL) + goto out_unlock; + + ret = cma_cm_event_handler(id_priv, event); + WARN_ON(ret); + +out_unlock: + mutex_unlock(&id_priv->handler_mutex); + if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN) + rdma_destroy_ah_attr(&event->param.ud.ah_attr); +} + static void cma_work_handler(struct work_struct *_work) { struct cma_work *work = container_of(_work, struct cma_work, work); @@ -4478,10 +4508,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) cma_make_mc_event(status, id_priv, multicast, &event, mc); ret = cma_cm_event_handler(id_priv, &event); rdma_destroy_ah_attr(&event.param.ud.ah_attr); - if (ret) { - destroy_id_handler_unlock(id_priv); - return 0; - } + WARN_ON(ret); out: mutex_unlock(&id_priv->handler_mutex); @@ -4593,7 +4620,6 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { - struct cma_work *work; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int err = 0; struct sockaddr *addr = (struct sockaddr *)&mc->addr; @@ -4607,10 +4633,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (cma_zero_addr(addr)) return -EINVAL; - work = kzalloc(sizeof *work, GFP_KERNEL); - if (!work) - return -ENOMEM; - gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); @@ -4621,10 +4643,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (!ndev) { - err = -ENODEV; - goto err_free; - } + if (!ndev) + return -ENODEV; + ib.rec.rate = iboe_get_rate(ndev); ib.rec.hop_limit = 1; ib.rec.mtu = iboe_get_mtu(ndev->mtu); @@ -4642,24 +4663,15 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, err = -ENOTSUPP; } dev_put(ndev); - if (err || !ib.rec.mtu) { - if (!err) - err = -EINVAL; - goto err_free; - } + if (err || !ib.rec.mtu) + return err ?: -EINVAL; + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &ib.rec.port_gid); - work->id = id_priv; - INIT_WORK(&work->work, cma_work_handler); - cma_make_mc_event(0, id_priv, &ib, &work->event, mc); - /* Balances with cma_id_put() in cma_work_handler */ - cma_id_get(id_priv); - queue_work(cma_wq, &work->work); + INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler); + cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc); + queue_work(cma_wq, &mc->iboe_join.work); return 0; - -err_free: - kfree(work); - return err; } int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, From bf139b58af09eaed8828510adc094fc281deaf73 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 11 Feb 2021 15:04:56 -0600 Subject: [PATCH 406/414] RDMA/rxe: Remove unused pkt->offset The pkt->offset field is never used except to assign it to 0. But it adds lots of unneeded code. This patch removes the field and related code. This causes a measurable improvement in performance. Link: https://lore.kernel.org/r/20210211210455.3274-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_hdr.h | 178 +++++++++++++-------------- drivers/infiniband/sw/rxe/rxe_recv.c | 4 +- drivers/infiniband/sw/rxe/rxe_req.c | 1 - drivers/infiniband/sw/rxe/rxe_resp.c | 3 +- 4 files changed, 90 insertions(+), 96 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index 3b483b75dfe3..e432f9e37795 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -22,7 +22,6 @@ struct rxe_pkt_info { u16 paylen; /* length of bth - icrc */ u8 port_num; /* port pkt received on */ u8 opcode; /* bth opcode of packet */ - u8 offset; /* bth offset from pkt->hdr */ }; /* Macros should be used only for received skb */ @@ -280,134 +279,134 @@ static inline void __bth_set_psn(void *arg, u32 psn) static inline u8 bth_opcode(struct rxe_pkt_info *pkt) { - return __bth_opcode(pkt->hdr + pkt->offset); + return __bth_opcode(pkt->hdr); } static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode) { - __bth_set_opcode(pkt->hdr + pkt->offset, opcode); + __bth_set_opcode(pkt->hdr, opcode); } static inline u8 bth_se(struct rxe_pkt_info *pkt) { - return __bth_se(pkt->hdr + pkt->offset); + return __bth_se(pkt->hdr); } static inline void bth_set_se(struct rxe_pkt_info *pkt, int se) { - __bth_set_se(pkt->hdr + pkt->offset, se); + __bth_set_se(pkt->hdr, se); } static inline u8 bth_mig(struct rxe_pkt_info *pkt) { - return __bth_mig(pkt->hdr + pkt->offset); + return __bth_mig(pkt->hdr); } static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig) { - __bth_set_mig(pkt->hdr + pkt->offset, mig); + __bth_set_mig(pkt->hdr, mig); } static inline u8 bth_pad(struct rxe_pkt_info *pkt) { - return __bth_pad(pkt->hdr + pkt->offset); + return __bth_pad(pkt->hdr); } static inline void bth_set_pad(struct rxe_pkt_info *pkt, u8 pad) { - __bth_set_pad(pkt->hdr + pkt->offset, pad); + __bth_set_pad(pkt->hdr, pad); } static inline u8 bth_tver(struct rxe_pkt_info *pkt) { - return __bth_tver(pkt->hdr + pkt->offset); + return __bth_tver(pkt->hdr); } static inline void bth_set_tver(struct rxe_pkt_info *pkt, u8 tver) { - __bth_set_tver(pkt->hdr + pkt->offset, tver); + __bth_set_tver(pkt->hdr, tver); } static inline u16 bth_pkey(struct rxe_pkt_info *pkt) { - return __bth_pkey(pkt->hdr + pkt->offset); + return __bth_pkey(pkt->hdr); } static inline void bth_set_pkey(struct rxe_pkt_info *pkt, u16 pkey) { - __bth_set_pkey(pkt->hdr + pkt->offset, pkey); + __bth_set_pkey(pkt->hdr, pkey); } static inline u32 bth_qpn(struct rxe_pkt_info *pkt) { - return __bth_qpn(pkt->hdr + pkt->offset); + return __bth_qpn(pkt->hdr); } static inline void bth_set_qpn(struct rxe_pkt_info *pkt, u32 qpn) { - __bth_set_qpn(pkt->hdr + pkt->offset, qpn); + __bth_set_qpn(pkt->hdr, qpn); } static inline int bth_fecn(struct rxe_pkt_info *pkt) { - return __bth_fecn(pkt->hdr + pkt->offset); + return __bth_fecn(pkt->hdr); } static inline void bth_set_fecn(struct rxe_pkt_info *pkt, int fecn) { - __bth_set_fecn(pkt->hdr + pkt->offset, fecn); + __bth_set_fecn(pkt->hdr, fecn); } static inline int bth_becn(struct rxe_pkt_info *pkt) { - return __bth_becn(pkt->hdr + pkt->offset); + return __bth_becn(pkt->hdr); } static inline void bth_set_becn(struct rxe_pkt_info *pkt, int becn) { - __bth_set_becn(pkt->hdr + pkt->offset, becn); + __bth_set_becn(pkt->hdr, becn); } static inline u8 bth_resv6a(struct rxe_pkt_info *pkt) { - return __bth_resv6a(pkt->hdr + pkt->offset); + return __bth_resv6a(pkt->hdr); } static inline void bth_set_resv6a(struct rxe_pkt_info *pkt) { - __bth_set_resv6a(pkt->hdr + pkt->offset); + __bth_set_resv6a(pkt->hdr); } static inline int bth_ack(struct rxe_pkt_info *pkt) { - return __bth_ack(pkt->hdr + pkt->offset); + return __bth_ack(pkt->hdr); } static inline void bth_set_ack(struct rxe_pkt_info *pkt, int ack) { - __bth_set_ack(pkt->hdr + pkt->offset, ack); + __bth_set_ack(pkt->hdr, ack); } static inline void bth_set_resv7(struct rxe_pkt_info *pkt) { - __bth_set_resv7(pkt->hdr + pkt->offset); + __bth_set_resv7(pkt->hdr); } static inline u32 bth_psn(struct rxe_pkt_info *pkt) { - return __bth_psn(pkt->hdr + pkt->offset); + return __bth_psn(pkt->hdr); } static inline void bth_set_psn(struct rxe_pkt_info *pkt, u32 psn) { - __bth_set_psn(pkt->hdr + pkt->offset, psn); + __bth_set_psn(pkt->hdr, psn); } static inline void bth_init(struct rxe_pkt_info *pkt, u8 opcode, int se, int mig, int pad, u16 pkey, u32 qpn, int ack_req, u32 psn) { - struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr + pkt->offset); + struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr); bth->opcode = opcode; bth->flags = (pad << 4) & BTH_PAD_MASK; @@ -448,14 +447,14 @@ static inline void __rdeth_set_een(void *arg, u32 een) static inline u8 rdeth_een(struct rxe_pkt_info *pkt) { - return __rdeth_een(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RDETH]); + return __rdeth_een(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RDETH]); } static inline void rdeth_set_een(struct rxe_pkt_info *pkt, u32 een) { - __rdeth_set_een(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een); + __rdeth_set_een(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een); } /****************************************************************************** @@ -499,26 +498,26 @@ static inline void __deth_set_sqp(void *arg, u32 sqp) static inline u32 deth_qkey(struct rxe_pkt_info *pkt) { - return __deth_qkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH]); + return __deth_qkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); } static inline void deth_set_qkey(struct rxe_pkt_info *pkt, u32 qkey) { - __deth_set_qkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey); + __deth_set_qkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey); } static inline u32 deth_sqp(struct rxe_pkt_info *pkt) { - return __deth_sqp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH]); + return __deth_sqp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); } static inline void deth_set_sqp(struct rxe_pkt_info *pkt, u32 sqp) { - __deth_set_sqp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp); + __deth_set_sqp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp); } /****************************************************************************** @@ -574,38 +573,38 @@ static inline void __reth_set_len(void *arg, u32 len) static inline u64 reth_va(struct rxe_pkt_info *pkt) { - return __reth_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH]); + return __reth_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); } static inline void reth_set_va(struct rxe_pkt_info *pkt, u64 va) { - __reth_set_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH], va); + __reth_set_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH], va); } static inline u32 reth_rkey(struct rxe_pkt_info *pkt) { - return __reth_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH]); + return __reth_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); } static inline void reth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) { - __reth_set_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey); + __reth_set_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey); } static inline u32 reth_len(struct rxe_pkt_info *pkt) { - return __reth_len(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH]); + return __reth_len(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); } static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len) { - __reth_set_len(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH], len); + __reth_set_len(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH], len); } /****************************************************************************** @@ -676,50 +675,50 @@ static inline void __atmeth_set_comp(void *arg, u64 comp) static inline u64 atmeth_va(struct rxe_pkt_info *pkt) { - return __atmeth_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_va(struct rxe_pkt_info *pkt, u64 va) { - __atmeth_set_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va); + __atmeth_set_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va); } static inline u32 atmeth_rkey(struct rxe_pkt_info *pkt) { - return __atmeth_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) { - __atmeth_set_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey); + __atmeth_set_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey); } static inline u64 atmeth_swap_add(struct rxe_pkt_info *pkt) { - return __atmeth_swap_add(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_swap_add(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_swap_add(struct rxe_pkt_info *pkt, u64 swap_add) { - __atmeth_set_swap_add(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add); + __atmeth_set_swap_add(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add); } static inline u64 atmeth_comp(struct rxe_pkt_info *pkt) { - return __atmeth_comp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_comp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_comp(struct rxe_pkt_info *pkt, u64 comp) { - __atmeth_set_comp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp); + __atmeth_set_comp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp); } /****************************************************************************** @@ -780,26 +779,26 @@ static inline void __aeth_set_msn(void *arg, u32 msn) static inline u8 aeth_syn(struct rxe_pkt_info *pkt) { - return __aeth_syn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH]); + return __aeth_syn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); } static inline void aeth_set_syn(struct rxe_pkt_info *pkt, u8 syn) { - __aeth_set_syn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn); + __aeth_set_syn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn); } static inline u32 aeth_msn(struct rxe_pkt_info *pkt) { - return __aeth_msn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH]); + return __aeth_msn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); } static inline void aeth_set_msn(struct rxe_pkt_info *pkt, u32 msn) { - __aeth_set_msn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn); + __aeth_set_msn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn); } /****************************************************************************** @@ -825,14 +824,14 @@ static inline void __atmack_set_orig(void *arg, u64 orig) static inline u64 atmack_orig(struct rxe_pkt_info *pkt) { - return __atmack_orig(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]); + return __atmack_orig(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]); } static inline void atmack_set_orig(struct rxe_pkt_info *pkt, u64 orig) { - __atmack_set_orig(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig); + __atmack_set_orig(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig); } /****************************************************************************** @@ -858,14 +857,14 @@ static inline void __immdt_set_imm(void *arg, __be32 imm) static inline __be32 immdt_imm(struct rxe_pkt_info *pkt) { - return __immdt_imm(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]); + return __immdt_imm(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]); } static inline void immdt_set_imm(struct rxe_pkt_info *pkt, __be32 imm) { - __immdt_set_imm(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm); + __immdt_set_imm(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm); } /****************************************************************************** @@ -891,14 +890,14 @@ static inline void __ieth_set_rkey(void *arg, u32 rkey) static inline u32 ieth_rkey(struct rxe_pkt_info *pkt) { - return __ieth_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IETH]); + return __ieth_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IETH]); } static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) { - __ieth_set_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey); + __ieth_set_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey); } enum rxe_hdr_length { @@ -915,13 +914,12 @@ enum rxe_hdr_length { static inline size_t header_size(struct rxe_pkt_info *pkt) { - return pkt->offset + rxe_opcode[pkt->opcode].length; + return rxe_opcode[pkt->opcode].length; } static inline void *payload_addr(struct rxe_pkt_info *pkt) { - return pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]; + return pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]; } static inline size_t payload_size(struct rxe_pkt_info *pkt) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 8a48a33d587b..45d2f711bce2 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -353,9 +353,7 @@ void rxe_rcv(struct sk_buff *skb) __be32 *icrcp; u32 calc_icrc, pack_icrc; - pkt->offset = 0; - - if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) + if (unlikely(skb->len < RXE_BTH_BYTES)) goto drop; if (rxe_chk_dgid(rxe, skb) < 0) { diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index d4917646641a..889290793d75 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -375,7 +375,6 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, pkt->psn = qp->req.psn; pkt->mask = rxe_opcode[opcode].mask; pkt->paylen = paylen; - pkt->offset = 0; pkt->wqe = wqe; /* init skb */ diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 5fd26786d79b..1ae94f2cb336 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -586,11 +586,10 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, ack->qp = qp; ack->opcode = opcode; ack->mask = rxe_opcode[opcode].mask; - ack->offset = pkt->offset; ack->paylen = paylen; /* fill in bth using the request packet headers */ - memcpy(ack->hdr, pkt->hdr, pkt->offset + RXE_BTH_BYTES); + memcpy(ack->hdr, pkt->hdr, RXE_BTH_BYTES); bth_set_opcode(ack, opcode); bth_set_qpn(ack, qp->attr.dest_qp_num); From e6daa8f61d8def10f0619fe51b4c794f69598e4f Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 12 Feb 2021 14:45:22 +0100 Subject: [PATCH 407/414] RDMA/rtrs-srv: Fix stack-out-of-bounds BUG: KASAN: stack-out-of-bounds in _mlx4_ib_post_send+0x1bd2/0x2770 [mlx4_ib] Read of size 4 at addr ffff8880d5a7f980 by task kworker/0:1H/565 CPU: 0 PID: 565 Comm: kworker/0:1H Tainted: G O 5.4.84-storage #5.4.84-1+feature+linux+5.4.y+dbg+20201216.1319+b6b887b~deb10 Hardware name: Supermicro H8QG6/H8QG6, BIOS 3.00 09/04/2012 Workqueue: ib-comp-wq ib_cq_poll_work [ib_core] Call Trace: dump_stack+0x96/0xe0 print_address_description.constprop.4+0x1f/0x300 ? irq_work_claim+0x2e/0x50 __kasan_report.cold.8+0x78/0x92 ? _mlx4_ib_post_send+0x1bd2/0x2770 [mlx4_ib] kasan_report+0x10/0x20 _mlx4_ib_post_send+0x1bd2/0x2770 [mlx4_ib] ? check_chain_key+0x1d7/0x2e0 ? _mlx4_ib_post_recv+0x630/0x630 [mlx4_ib] ? lockdep_hardirqs_on+0x1a8/0x290 ? stack_depot_save+0x218/0x56e ? do_profile_hits.isra.6.cold.13+0x1d/0x1d ? check_chain_key+0x1d7/0x2e0 ? save_stack+0x4d/0x80 ? save_stack+0x19/0x80 ? __kasan_slab_free+0x125/0x170 ? kfree+0xe7/0x3b0 rdma_write_sg+0x5b0/0x950 [rtrs_server] The problem is when we send imm_wr, the type should be ib_rdma_wr, so hw driver like mlx4 can do rdma_wr(wr), so fix it by use the ib_rdma_wr as type for imm_wr. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20210212134525.103456-2-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Reviewed-by: Gioh Kim Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 64 +++++++++++++------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 918f1cf140cd..e13e91c2a44a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -222,7 +222,8 @@ static int rdma_write_sg(struct rtrs_srv_op *id) dma_addr_t dma_addr = sess->dma_addr[id->msg_id]; struct rtrs_srv_mr *srv_mr; struct rtrs_srv *srv = sess->srv; - struct ib_send_wr inv_wr, imm_wr; + struct ib_send_wr inv_wr; + struct ib_rdma_wr imm_wr; struct ib_rdma_wr *wr = NULL; enum ib_send_flags flags; size_t sg_cnt; @@ -274,15 +275,15 @@ static int rdma_write_sg(struct rtrs_srv_op *id) if (need_inval && always_invalidate) { wr->wr.next = &rwr.wr; rwr.wr.next = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else if (always_invalidate) { wr->wr.next = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (need_inval) { wr->wr.next = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else { - wr->wr.next = &imm_wr; + wr->wr.next = &imm_wr.wr; } /* * From time to time we have to post signaled sends, @@ -300,7 +301,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id) inv_wr.ex.invalidate_rkey = rkey; } - imm_wr.next = NULL; + imm_wr.wr.next = NULL; if (always_invalidate) { struct rtrs_msg_rkey_rsp *msg; @@ -321,22 +322,22 @@ static int rdma_write_sg(struct rtrs_srv_op *id) list.addr = srv_mr->iu->dma_addr; list.length = sizeof(*msg); list.lkey = sess->s.dev->ib_pd->local_dma_lkey; - imm_wr.sg_list = &list; - imm_wr.num_sge = 1; - imm_wr.opcode = IB_WR_SEND_WITH_IMM; + imm_wr.wr.sg_list = &list; + imm_wr.wr.num_sge = 1; + imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; ib_dma_sync_single_for_device(sess->s.dev->ib_dev, srv_mr->iu->dma_addr, srv_mr->iu->size, DMA_TO_DEVICE); } else { - imm_wr.sg_list = NULL; - imm_wr.num_sge = 0; - imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; + imm_wr.wr.sg_list = NULL; + imm_wr.wr.num_sge = 0; + imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; } - imm_wr.send_flags = flags; - imm_wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id, + imm_wr.wr.send_flags = flags; + imm_wr.wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id, 0, need_inval)); - imm_wr.wr_cqe = &io_comp_cqe; + imm_wr.wr.wr_cqe = &io_comp_cqe; ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr, offset, DMA_BIDIRECTIONAL); @@ -363,7 +364,8 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, { struct rtrs_sess *s = con->c.sess; struct rtrs_srv_sess *sess = to_srv_sess(s); - struct ib_send_wr inv_wr, imm_wr, *wr = NULL; + struct ib_send_wr inv_wr, *wr = NULL; + struct ib_rdma_wr imm_wr; struct ib_reg_wr rwr; struct rtrs_srv *srv = sess->srv; struct rtrs_srv_mr *srv_mr; @@ -400,15 +402,15 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, if (need_inval && always_invalidate) { wr = &inv_wr; inv_wr.next = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (always_invalidate) { wr = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (need_inval) { wr = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else { - wr = &imm_wr; + wr = &imm_wr.wr; } /* * From time to time we have to post signalled sends, @@ -417,13 +419,13 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, flags = (atomic_inc_return(&con->wr_cnt) % srv->queue_depth) ? 0 : IB_SEND_SIGNALED; imm = rtrs_to_io_rsp_imm(id->msg_id, errno, need_inval); - imm_wr.next = NULL; + imm_wr.wr.next = NULL; if (always_invalidate) { struct ib_sge list; struct rtrs_msg_rkey_rsp *msg; srv_mr = &sess->mrs[id->msg_id]; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; rwr.wr.opcode = IB_WR_REG_MR; rwr.wr.wr_cqe = &local_reg_cqe; rwr.wr.num_sge = 0; @@ -440,21 +442,21 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, list.addr = srv_mr->iu->dma_addr; list.length = sizeof(*msg); list.lkey = sess->s.dev->ib_pd->local_dma_lkey; - imm_wr.sg_list = &list; - imm_wr.num_sge = 1; - imm_wr.opcode = IB_WR_SEND_WITH_IMM; + imm_wr.wr.sg_list = &list; + imm_wr.wr.num_sge = 1; + imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; ib_dma_sync_single_for_device(sess->s.dev->ib_dev, srv_mr->iu->dma_addr, srv_mr->iu->size, DMA_TO_DEVICE); } else { - imm_wr.sg_list = NULL; - imm_wr.num_sge = 0; - imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; + imm_wr.wr.sg_list = NULL; + imm_wr.wr.num_sge = 0; + imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; } - imm_wr.send_flags = flags; - imm_wr.wr_cqe = &io_comp_cqe; + imm_wr.wr.send_flags = flags; + imm_wr.wr.wr_cqe = &io_comp_cqe; - imm_wr.ex.imm_data = cpu_to_be32(imm); + imm_wr.wr.ex.imm_data = cpu_to_be32(imm); err = ib_post_send(id->con->c.qp, wr, NULL); if (unlikely(err)) From 03e9b33a0fd677f554b03352646c13459bf60458 Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Fri, 12 Feb 2021 14:45:23 +0100 Subject: [PATCH 408/414] RDMA/rtrs: Only allow addition of path to an already established session While adding a path from the client side to an already established session, it was possible to provide the destination IP to a different server. This is dangerous. This commit adds an extra member to the rtrs_msg_conn_req structure, named first_conn; which is supposed to notify if the connection request is the first for that session or not. On the server side, if a session does not exist but the first_conn received inside the rtrs_msg_conn_req structure is 1, the connection request is failed. This signifies that the connection request is for an already existing session, and since the server did not find one, it is an wrong connection request. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20210212134525.103456-3-jinpu.wang@cloud.ionos.com Signed-off-by: Md Haris Iqbal Reviewed-by: Lutz Pogrell Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 7 +++++++ drivers/infiniband/ulp/rtrs/rtrs-clt.h | 1 + drivers/infiniband/ulp/rtrs/rtrs-pri.h | 4 +++- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 21 +++++++++++++++------ 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 7644c3f627ca..0a08b4b742a3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -31,6 +31,8 @@ */ #define RTRS_RECONNECT_SEED 8 +#define FIRST_CONN 0x01 + MODULE_DESCRIPTION("RDMA Transport Client"); MODULE_LICENSE("GPL"); @@ -1660,6 +1662,7 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con) .cid_num = cpu_to_le16(sess->s.con_num), .recon_cnt = cpu_to_le16(sess->s.recon_cnt), }; + msg.first_conn = sess->for_new_clt ? FIRST_CONN : 0; uuid_copy(&msg.sess_uuid, &sess->s.uuid); uuid_copy(&msg.paths_uuid, &clt->paths_uuid); @@ -1745,6 +1748,8 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, scnprintf(sess->hca_name, sizeof(sess->hca_name), sess->s.dev->ib_dev->name); sess->s.src_addr = con->c.cm_id->route.addr.src_addr; + /* set for_new_clt, to allow future reconnect on any path */ + sess->for_new_clt = 1; } return 0; @@ -2662,6 +2667,8 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, err = PTR_ERR(sess); goto close_all_sess; } + if (!i) + sess->for_new_clt = 1; list_add_tail_rcu(&sess->s.entry, &clt->paths_list); err = init_sess(sess); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index a97a068c4c28..692bc83e1f09 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -143,6 +143,7 @@ struct rtrs_clt_sess { int max_send_sge; u32 flags; struct kobject kobj; + u8 for_new_clt; struct rtrs_clt_stats *stats; /* cache hca_port and hca_name to display in sysfs */ u8 hca_port; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index d5621e6fad1b..8caad0a2322b 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -188,7 +188,9 @@ struct rtrs_msg_conn_req { __le16 recon_cnt; uuid_t sess_uuid; uuid_t paths_uuid; - u8 reserved[12]; + u8 first_conn : 1; + u8 reserved_bits : 7; + u8 reserved[11]; }; /** diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index e13e91c2a44a..fbe39360ff12 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1333,7 +1333,8 @@ static void free_srv(struct rtrs_srv *srv) } static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) + const uuid_t *paths_uuid, + bool first_conn) { struct rtrs_srv *srv; int i; @@ -1346,12 +1347,20 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, return srv; } } + /* + * If this request is not the first connection request from the + * client for this session then fail and return error. + */ + if (!first_conn) { + mutex_unlock(&ctx->srv_mutex); + return ERR_PTR(-ENXIO); + } /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); if (!srv) { mutex_unlock(&ctx->srv_mutex); - return NULL; + return ERR_PTR(-ENOMEM); } INIT_LIST_HEAD(&srv->paths_list); @@ -1386,7 +1395,7 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, err_free_srv: kfree(srv); - return NULL; + return ERR_PTR(-ENOMEM); } static void put_srv(struct rtrs_srv *srv) @@ -1787,13 +1796,13 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, goto reject_w_econnreset; } recon_cnt = le16_to_cpu(msg->recon_cnt); - srv = get_or_create_srv(ctx, &msg->paths_uuid); + srv = get_or_create_srv(ctx, &msg->paths_uuid, msg->first_conn); /* * "refcount == 0" happens if a previous thread calls get_or_create_srv * allocate srv, but chunks of srv are not allocated yet. */ - if (!srv || refcount_read(&srv->refcount) == 0) { - err = -ENOMEM; + if (IS_ERR(srv) || refcount_read(&srv->refcount) == 0) { + err = PTR_ERR(srv); goto reject_w_err; } mutex_lock(&srv->paths_mutex); From f7452a7e96c120d73100387d5f87de9fce7133cb Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Fri, 12 Feb 2021 14:45:24 +0100 Subject: [PATCH 409/414] RDMA/rtrs-srv: fix memory leak by missing kobject free kmemleak reported an error as below: unreferenced object 0xffff8880674b7640 (size 64): comm "kworker/4:1H", pid 113, jiffies 4296403507 (age 507.840s) hex dump (first 32 bytes): 69 70 3a 31 39 32 2e 31 36 38 2e 31 32 32 2e 31 ip:192.168.122.1 31 30 40 69 70 3a 31 39 32 2e 31 36 38 2e 31 32 10@ip:192.168.12 backtrace: [<0000000054413611>] kstrdup+0x2e/0x60 [<0000000078e3120a>] kobject_set_name_vargs+0x2f/0xb0 [<00000000ca2be3ee>] kobject_init_and_add+0xb0/0x120 [<0000000062ba5e78>] rtrs_srv_create_sess_files+0x14c/0x314 [rtrs_server] [<00000000b45b7217>] rtrs_srv_info_req_done+0x5b1/0x800 [rtrs_server] [<000000008fc5aa8f>] __ib_process_cq+0x94/0x100 [ib_core] [<00000000a9599cb4>] ib_cq_poll_work+0x32/0xc0 [ib_core] [<00000000cfc376be>] process_one_work+0x4bc/0x980 [<0000000016e5c96a>] worker_thread+0x78/0x5c0 [<00000000c20b8be0>] kthread+0x191/0x1e0 [<000000006c9c0003>] ret_from_fork+0x3a/0x50 It is caused by the not-freed kobject of rtrs_srv_sess. The kobject embedded in rtrs_srv_sess has ref-counter 2 after calling process_info_req(). Therefore it must call kobject_put twice. Currently it calls kobject_put only once at rtrs_srv_destroy_sess_files because kobject_del removes the state_in_sysfs flag and then kobject_put in free_sess() is not called. This patch moves kobject_del() into free_sess() so that the kobject of rtrs_srv_sess can be freed. And also this patch adds the missing call of sysfs_remove_group() to clean-up the sysfs directory. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20210212134525.103456-4-jinpu.wang@cloud.ionos.com Signed-off-by: Gioh Kim Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 2 +- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 0a3886629cae..94e3f3290500 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -305,7 +305,7 @@ void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess) if (sess->kobj.state_in_sysfs) { kobject_del(&sess->stats->kobj_stats); kobject_put(&sess->stats->kobj_stats); - kobject_del(&sess->kobj); + sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group); kobject_put(&sess->kobj); rtrs_srv_destroy_once_sysfs_root_folders(sess); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index fbe39360ff12..eb17c3a08810 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1475,10 +1475,12 @@ static bool __is_path_w_addr_exists(struct rtrs_srv *srv, static void free_sess(struct rtrs_srv_sess *sess) { - if (sess->kobj.state_in_sysfs) + if (sess->kobj.state_in_sysfs) { + kobject_del(&sess->kobj); kobject_put(&sess->kobj); - else + } else { kfree(sess); + } } static void rtrs_srv_close_work(struct work_struct *work) From e2853c49477d104c01d3c7944e1fb5074eb11d9f Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Fri, 12 Feb 2021 14:45:25 +0100 Subject: [PATCH 410/414] RDMA/rtrs-srv-sysfs: fix missing put_device put_device() decreases the ref-count and then the device will be cleaned-up, while at is also add missing put_device in rtrs_srv_create_once_sysfs_root_folders This patch solves a kmemleak error as below: unreferenced object 0xffff88809a7a0710 (size 8): comm "kworker/4:1H", pid 113, jiffies 4295833049 (age 6212.380s) hex dump (first 8 bytes): 62 6c 61 00 6b 6b 6b a5 bla.kkk. backtrace: [<0000000054413611>] kstrdup+0x2e/0x60 [<0000000078e3120a>] kobject_set_name_vargs+0x2f/0xb0 [<00000000f1a17a6b>] dev_set_name+0xab/0xe0 [<00000000d5502e32>] rtrs_srv_create_sess_files+0x2fb/0x314 [rtrs_server] [<00000000ed11a1ef>] rtrs_srv_info_req_done+0x631/0x800 [rtrs_server] [<000000008fc5aa8f>] __ib_process_cq+0x94/0x100 [ib_core] [<00000000a9599cb4>] ib_cq_poll_work+0x32/0xc0 [ib_core] [<00000000cfc376be>] process_one_work+0x4bc/0x980 [<0000000016e5c96a>] worker_thread+0x78/0x5c0 [<00000000c20b8be0>] kthread+0x191/0x1e0 [<000000006c9c0003>] ret_from_fork+0x3a/0x50 Fixes: baa5b28b7a47 ("RDMA/rtrs-srv: Replace device_register with device_initialize and device_add") Link: https://lore.kernel.org/r/20210212134525.103456-5-jinpu.wang@cloud.ionos.com Signed-off-by: Gioh Kim Signed-off-by: Jack Wang Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 94e3f3290500..126a96e75c62 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -183,6 +183,7 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess) err = -ENOMEM; pr_err("kobject_create_and_add(): %d\n", err); device_del(&srv->dev); + put_device(&srv->dev); goto unlock; } dev_set_uevent_suppress(&srv->dev, false); @@ -208,6 +209,7 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess) kobject_put(srv->kobj_paths); mutex_unlock(&srv->paths_mutex); device_del(&srv->dev); + put_device(&srv->dev); } else { mutex_unlock(&srv->paths_mutex); } From 7232c132d13aafd178ba18c1099b2cb98d104b8c Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Thu, 11 Feb 2021 10:55:49 +0200 Subject: [PATCH 411/414] RDMA/mlx5: Allow CQ creation without attached EQs The traditional DevX CQ creation flow goes through mlx5_core_create_cq() which checks that the given EQN corresponds to an existing EQ and attaches a devx handler to the EQN for the CQ. In some cases the EQ will not be a kernel EQ, but will be controlled by modify CQ, don't block creating these just because the EQN can't be found in the kernel. Link: https://lore.kernel.org/r/20210211085549.1277674-1-leon@kernel.org Signed-off-by: Tal Gilboa Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 13 ++++++++++++- include/linux/mlx5/mlx5_ifc.h | 5 +++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index e39661db0d2f..ebc2a4355fa5 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1439,6 +1439,16 @@ static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) rcu_read_unlock(); } +static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in) +{ + if (!MLX5_CAP_GEN(dev->mdev, apu) || + !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), + apu_thread_cq)) + return false; + + return true; +} + static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( struct uverbs_attr_bundle *attrs) { @@ -1492,7 +1502,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( obj->flags |= DEVX_OBJ_FLAGS_DCT; err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in, cmd_in_len, cmd_out, cmd_out_len); - } else if (opcode == MLX5_CMD_OP_CREATE_CQ) { + } else if (opcode == MLX5_CMD_OP_CREATE_CQ && + !is_apu_thread_cq(dev, cmd_in)) { obj->flags |= DEVX_OBJ_FLAGS_CQ; obj->core_cq.comp = devx_cq_comp; err = mlx5_core_create_cq(dev->mdev, &obj->core_cq, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cf692fc17f41..768e097c633b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1634,7 +1634,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 sf_set_partition[0x1]; u8 reserved_at_682[0x1]; u8 log_max_sf[0x5]; - u8 reserved_at_688[0x8]; + u8 apu[0x1]; + u8 reserved_at_689[0x7]; u8 log_min_sf_size[0x8]; u8 max_num_sf_partitions[0x8]; @@ -3816,7 +3817,7 @@ struct mlx5_ifc_cqc_bits { u8 status[0x4]; u8 reserved_at_4[0x2]; u8 dbr_umem_valid[0x1]; - u8 reserved_at_7[0x1]; + u8 apu_thread_cq[0x1]; u8 cqe_sz[0x3]; u8 cc[0x1]; u8 reserved_at_c[0x1]; From 2fe8d4b87802dcde7fa015229c84bb726f631b4d Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 9 Feb 2021 15:11:07 +0200 Subject: [PATCH 412/414] RDMA/mlx5: Fail QP creation if the device can not support the CQE TS In ConnectX6Dx device, HW can work in real time timestamp mode according to the device capabilities per RQ/SQ/QP. When the flag IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION is set, the user expect to get TS on the CQEs in free running format, so we need to fail the QP creation if the current mode of the device doesn't support it. Link: https://lore.kernel.org/r/20210209131107.698833-3-leon@kernel.org Signed-off-by: Aharon Landau Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 104 +++++++++++++++++++++++++++++--- 1 file changed, 96 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0cb7cc642d87..2e234fecc00e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1078,6 +1078,7 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev, qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); MLX5_SET(qpc, qpc, uar_page, uar_index); + MLX5_SET(qpc, qpc, ts_format, MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT); MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); /* Set "fast registration enabled" for all kernel QPs */ @@ -1172,10 +1173,72 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq) sq->flow_rule = NULL; } +static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq) +{ + bool fr_supported = + MLX5_CAP_GEN(dev->mdev, rq_ts_format) == + MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_GEN(dev->mdev, rq_ts_format) == + MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + + if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) { + if (!fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return MLX5_RQC_TIMESTAMP_FORMAT_FREE_RUNNING; + } + return MLX5_RQC_TIMESTAMP_FORMAT_DEFAULT; +} + +static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq) +{ + bool fr_supported = + MLX5_CAP_GEN(dev->mdev, sq_ts_format) == + MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_GEN(dev->mdev, sq_ts_format) == + MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + + if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) { + if (!fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return MLX5_SQC_TIMESTAMP_FORMAT_FREE_RUNNING; + } + return MLX5_SQC_TIMESTAMP_FORMAT_DEFAULT; +} + +static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq, + struct mlx5_ib_cq *recv_cq) +{ + bool fr_supported = + MLX5_CAP_ROCE(dev->mdev, qp_ts_format) == + MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_ROCE(dev->mdev, qp_ts_format) == + MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + int ts_format = MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT; + + if (recv_cq && + recv_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) + ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING; + + if (send_cq && + send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) + ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING; + + if (ts_format == MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING && + !fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return ts_format; +} + static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct mlx5_ib_sq *sq, void *qpin, - struct ib_pd *pd) + struct ib_pd *pd, struct mlx5_ib_cq *cq) { struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer; __be64 *pas; @@ -1187,6 +1250,11 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, int err; unsigned int page_offset_quantized; unsigned long page_size; + int ts_format; + + ts_format = get_sq_ts_format(dev, cq); + if (ts_format < 0) + return ts_format; sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, ubuffer->buf_size, 0); @@ -1215,6 +1283,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, ts_format, ts_format); MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); MLX5_SET(sqc, sqc, tis_lst_sz, 1); @@ -1263,7 +1332,7 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin, - struct ib_pd *pd) + struct ib_pd *pd, struct mlx5_ib_cq *cq) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; @@ -1274,9 +1343,14 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct ib_umem *umem = rq->base.ubuffer.umem; unsigned int page_offset_quantized; unsigned long page_size = 0; + int ts_format; size_t inlen; int err; + ts_format = get_rq_ts_format(dev, cq); + if (ts_format < 0) + return ts_format; + page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, @@ -1296,6 +1370,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(rqc, rqc, vsd, 1); MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, ts_format, ts_format); MLX5_SET(rqc, rqc, flush_in_error_en, 1); MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv)); @@ -1393,10 +1468,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, } static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - u32 *in, size_t inlen, - struct ib_pd *pd, + u32 *in, size_t inlen, struct ib_pd *pd, struct ib_udata *udata, - struct mlx5_ib_create_qp_resp *resp) + struct mlx5_ib_create_qp_resp *resp, + struct ib_qp_init_attr *init_attr) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; @@ -1415,7 +1490,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) return err; - err = create_raw_packet_qp_sq(dev, udata, sq, in, pd); + err = create_raw_packet_qp_sq(dev, udata, sq, in, pd, + to_mcq(init_attr->send_cq)); if (err) goto err_destroy_tis; @@ -1437,7 +1513,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in, pd); + err = create_raw_packet_qp_rq(dev, rq, in, pd, + to_mcq(init_attr->recv_cq)); if (err) goto err_destroy_sq; @@ -1907,6 +1984,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_cq *recv_cq; unsigned long flags; struct mlx5_ib_qp_base *base; + int ts_format; int mlx5_st; void *qpc; u32 *in; @@ -1944,6 +2022,13 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz))) return -EINVAL; + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { + ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq)); + if (ts_format < 0) + return ts_format; + } + err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, ¶ms->resp, &inlen, base, ucmd); if (err) @@ -1992,6 +2077,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt)); } + if (init_attr->qp_type != IB_QPT_RAW_PACKET) + MLX5_SET(qpc, qpc, ts_format, ts_format); + MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr)); if (qp->sq.wqe_cnt) { @@ -2046,7 +2134,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, - ¶ms->resp); + ¶ms->resp, init_attr); } else err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out); From 2b5715fc17386a6223490d5b8f08d031999b0c0b Mon Sep 17 00:00:00 2001 From: Nicolas Morey-Chaisemartin Date: Fri, 5 Feb 2021 09:14:28 +0100 Subject: [PATCH 413/414] RDMA/srp: Fix support for unpopulated and unbalanced NUMA nodes The current code computes a number of channels per SRP target and spreads them equally across all online NUMA nodes. Each channel is then assigned a CPU within this node. In the case of unbalanced, or even unpopulated nodes, some channels do not get a CPU associated and thus do not get connected. This causes the SRP connection to fail. This patch solves the issue by rewriting channel computation and allocation: - Drop channel to node/CPU association as it had no real effect on locality but added unnecessary complexity. - Tweak the number of channels allocated to reduce CPU contention when possible: - Up to one channel per CPU (instead of up to 4 by node) - At least 4 channels per node, unless ch_count module parameter is used. Link: https://lore.kernel.org/r/9cb4d9d3-30ad-2276-7eff-e85f7ddfb411@suse.com Signed-off-by: Nicolas Morey-Chaisemartin Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srp/ib_srp.c | 108 ++++++++++++---------------- 1 file changed, 44 insertions(+), 64 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 5492b66a8153..31f8aa2c40ed 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3628,7 +3628,7 @@ static ssize_t srp_create_target(struct device *dev, struct srp_rdma_ch *ch; struct srp_device *srp_dev = host->srp_dev; struct ib_device *ibdev = srp_dev->dev; - int ret, node_idx, node, cpu, i; + int ret, i, ch_idx; unsigned int max_sectors_per_mr, mr_per_cmd = 0; bool multich = false; uint32_t max_iu_len; @@ -3753,81 +3753,61 @@ static ssize_t srp_create_target(struct device *dev, goto out; ret = -ENOMEM; - if (target->ch_count == 0) + if (target->ch_count == 0) { target->ch_count = - max_t(unsigned int, num_online_nodes(), - min(ch_count ?: - min(4 * num_online_nodes(), - ibdev->num_comp_vectors), - num_online_cpus())); + min(ch_count ?: + max(4 * num_online_nodes(), + ibdev->num_comp_vectors), + num_online_cpus()); + } + target->ch = kcalloc(target->ch_count, sizeof(*target->ch), GFP_KERNEL); if (!target->ch) goto out; - node_idx = 0; - for_each_online_node(node) { - const int ch_start = (node_idx * target->ch_count / - num_online_nodes()); - const int ch_end = ((node_idx + 1) * target->ch_count / - num_online_nodes()); - const int cv_start = node_idx * ibdev->num_comp_vectors / - num_online_nodes(); - const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors / - num_online_nodes(); - int cpu_idx = 0; + for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) { + ch = &target->ch[ch_idx]; + ch->target = target; + ch->comp_vector = ch_idx % ibdev->num_comp_vectors; + spin_lock_init(&ch->lock); + INIT_LIST_HEAD(&ch->free_tx); + ret = srp_new_cm_id(ch); + if (ret) + goto err_disconnect; - for_each_online_cpu(cpu) { - if (cpu_to_node(cpu) != node) - continue; - if (ch_start + cpu_idx >= ch_end) - continue; - ch = &target->ch[ch_start + cpu_idx]; - ch->target = target; - ch->comp_vector = cv_start == cv_end ? cv_start : - cv_start + cpu_idx % (cv_end - cv_start); - spin_lock_init(&ch->lock); - INIT_LIST_HEAD(&ch->free_tx); - ret = srp_new_cm_id(ch); - if (ret) - goto err_disconnect; + ret = srp_create_ch_ib(ch); + if (ret) + goto err_disconnect; - ret = srp_create_ch_ib(ch); - if (ret) - goto err_disconnect; + ret = srp_alloc_req_data(ch); + if (ret) + goto err_disconnect; - ret = srp_alloc_req_data(ch); - if (ret) - goto err_disconnect; + ret = srp_connect_ch(ch, max_iu_len, multich); + if (ret) { + char dst[64]; - ret = srp_connect_ch(ch, max_iu_len, multich); - if (ret) { - char dst[64]; - - if (target->using_rdma_cm) - snprintf(dst, sizeof(dst), "%pIS", - &target->rdma_cm.dst); - else - snprintf(dst, sizeof(dst), "%pI6", - target->ib_cm.orig_dgid.raw); - shost_printk(KERN_ERR, target->scsi_host, - PFX "Connection %d/%d to %s failed\n", - ch_start + cpu_idx, - target->ch_count, dst); - if (node_idx == 0 && cpu_idx == 0) { - goto free_ch; - } else { - srp_free_ch_ib(target, ch); - srp_free_req_data(target, ch); - target->ch_count = ch - target->ch; - goto connected; - } + if (target->using_rdma_cm) + snprintf(dst, sizeof(dst), "%pIS", + &target->rdma_cm.dst); + else + snprintf(dst, sizeof(dst), "%pI6", + target->ib_cm.orig_dgid.raw); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Connection %d/%d to %s failed\n", + ch_idx, + target->ch_count, dst); + if (ch_idx == 0) { + goto free_ch; + } else { + srp_free_ch_ib(target, ch); + srp_free_req_data(target, ch); + target->ch_count = ch - target->ch; + goto connected; } - - multich = true; - cpu_idx++; } - node_idx++; + multich = true; } connected: From ed408529679737a9a7ad816c8de5d59ba104bb11 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Tue, 16 Feb 2021 15:38:07 +0100 Subject: [PATCH 414/414] RDMA/rtrs-srv: Do not pass a valid pointer to PTR_ERR() smatch gives the warning: drivers/infiniband/ulp/rtrs/rtrs-srv.c:1805 rtrs_rdma_connect() warn: passing zero to 'PTR_ERR' Which is trying to say smatch has shown that srv is not an error pointer and thus cannot be passed to PTR_ERR. The solution is to move the list_add() down after full initilization of rtrs_srv. To avoid holding the srv_mutex too long, only hold it during the list operation as suggested by Leon. Fixes: 03e9b33a0fd6 ("RDMA/rtrs: Only allow addition of path to an already established session") Link: https://lore.kernel.org/r/20210216143807.65923-1-jinpu.wang@cloud.ionos.com Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index eb17c3a08810..d071809e3ed2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1347,21 +1347,18 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, return srv; } } + mutex_unlock(&ctx->srv_mutex); /* * If this request is not the first connection request from the * client for this session then fail and return error. */ - if (!first_conn) { - mutex_unlock(&ctx->srv_mutex); + if (!first_conn) return ERR_PTR(-ENXIO); - } /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); - if (!srv) { - mutex_unlock(&ctx->srv_mutex); + if (!srv) return ERR_PTR(-ENOMEM); - } INIT_LIST_HEAD(&srv->paths_list); mutex_init(&srv->paths_mutex); @@ -1371,8 +1368,6 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, srv->ctx = ctx; device_initialize(&srv->dev); srv->dev.release = rtrs_srv_dev_release; - list_add(&srv->ctx_list, &ctx->srv_list); - mutex_unlock(&ctx->srv_mutex); srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), GFP_KERNEL); @@ -1385,6 +1380,9 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, goto err_free_chunks; } refcount_set(&srv->refcount, 1); + mutex_lock(&ctx->srv_mutex); + list_add(&srv->ctx_list, &ctx->srv_list); + mutex_unlock(&ctx->srv_mutex); return srv; @@ -1799,11 +1797,7 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, } recon_cnt = le16_to_cpu(msg->recon_cnt); srv = get_or_create_srv(ctx, &msg->paths_uuid, msg->first_conn); - /* - * "refcount == 0" happens if a previous thread calls get_or_create_srv - * allocate srv, but chunks of srv are not allocated yet. - */ - if (IS_ERR(srv) || refcount_read(&srv->refcount) == 0) { + if (IS_ERR(srv)) { err = PTR_ERR(srv); goto reject_w_err; }